From 4d826e4c32a0e86dcea71a5a067df96db72ba282 Mon Sep 17 00:00:00 2001
From: vsoch <vsoch@users.noreply.github.com>
Date: Sun, 7 Jan 2024 21:37:58 -0700
Subject: [PATCH 01/28] docs: add section to README for developer

Also tweak some docstrings for fluence.

Signed-off-by: vsoch <vsoch@users.noreply.github.com>
---
 README.md                                    | 10 ++++++++++
 sig-scheduler-plugins/pkg/fluence/fluence.go | 10 +++++-----
 2 files changed, 15 insertions(+), 5 deletions(-)

diff --git a/README.md b/README.md
index 968c2dc..0e2b4c9 100644
--- a/README.md
+++ b/README.md
@@ -11,6 +11,7 @@ For background on the Flux framework and the Fluxion scheduler, you can take a l
 
  - To deploy our pre-built images, go to [Deploy](#deploy)
  - To build your own images, go to [Setup](#setup)
+ - To learn about repository organization, see [Developer](#developer)
 
 ### Deploy
 
@@ -431,6 +432,15 @@ For the above, I found [this page](https://kubernetes.io/docs/tasks/extend-kuber
 
 Finally, note that we also have a more appropriate example with jobs under [examples/test_example](examples/test_example). It's slightly more sane because it uses Job, and jobs are expected to complete (whereas pods are not and will get into crash loop backoffs, etc). For example of how to programmatically interact with the job pods and check states, events, see the [test.sh](.github/test.sh) script.
 
+### Developer
+
+You can see [deploy](#deploy) for instructions on how to do a custom deployment. If you are looking to develop:
+
+ - [src](src): includes source code for fluence
+ - [sig-scheduler-plugins](sig-scheduler-plugins): includes assets (manifests and Go files) that are intended to be added to the kubernetes-sigs/scheduler-plugins upstream repository before build
+ - *upstream*: the default name this upstream is cloned to when you do a make build command.
+
+Note that the clone of the repository and copying of files to the correct locations is all automated through the [Makefile](Makefile). This section exists to alert you to where to look for the different assets defined above.
 
 ## Papers
 
diff --git a/sig-scheduler-plugins/pkg/fluence/fluence.go b/sig-scheduler-plugins/pkg/fluence/fluence.go
index fec0a35..a705e2c 100644
--- a/sig-scheduler-plugins/pkg/fluence/fluence.go
+++ b/sig-scheduler-plugins/pkg/fluence/fluence.go
@@ -62,7 +62,7 @@ func (f *Fluence) Name() string {
 	return Name
 }
 
-// initialize and return a new Flux Plugin
+// Initialize and return a new Fluence Custom Scheduler Plugin
 // Note from vsoch: seems analogous to:
 // https://github.com/kubernetes-sigs/scheduler-plugins/blob/master/pkg/coscheduling/coscheduling.go#L63
 func New(_ runtime.Object, handle framework.Handle) (framework.Plugin, error) {
@@ -82,9 +82,9 @@ func New(_ runtime.Object, handle framework.Handle) (framework.Plugin, error) {
 	klog.Info("Create generic pod informer")
 
 	scheme := runtime.NewScheme()
-	_ = clientscheme.AddToScheme(scheme)
-	_ = v1.AddToScheme(scheme)
-	_ = v1alpha1.AddToScheme(scheme)
+	clientscheme.AddToScheme(scheme)
+	v1.AddToScheme(scheme)
+	v1alpha1.AddToScheme(scheme)
 	client, err := client.New(handle.KubeConfig(), client.Options{Scheme: scheme})
 	if err != nil {
 		return nil, err
@@ -121,7 +121,7 @@ func New(_ runtime.Object, handle framework.Handle) (framework.Plugin, error) {
 		return nil, err
 	}
 
-	klog.Info("Fluence starts")
+	klog.Info("Fluence start")
 	return f, nil
 }
 

From 1026549c7fb96295b0cb205d63b236f5d0493691 Mon Sep 17 00:00:00 2001
From: vsoch <vsoch@users.noreply.github.com>
Date: Sun, 7 Jan 2024 21:42:44 -0700
Subject: [PATCH 02/28] build: add commands to make for clone and update

Problem: the local upstream might get out of date
Solution: provide an easy make update to pull from it.
Signed-off-by: vsoch <vsoch@users.noreply.github.com>
---
 Makefile  | 11 ++++++++---
 README.md | 12 +++++++++++-
 2 files changed, 19 insertions(+), 4 deletions(-)

diff --git a/Makefile b/Makefile
index dc87d50..e20f706 100644
--- a/Makefile
+++ b/Makefile
@@ -10,15 +10,20 @@ SIDECAR_IMAGE ?= fluence-sidecar:latest
 CONTROLLER_IMAGE ?= fluence-controller
 SCHEDULER_IMAGE ?= fluence
 
-.PHONY: all build build-sidecar prepare push push-sidecar push-controller
+.PHONY: all build build-sidecar clone prepare push push-sidecar push-controller
 
-all: build-sidecar prepare build
+all: build-sidecar prepare build update clone update
 
 build-sidecar: 
 	make -C ./src LOCAL_REGISTRY=${REGISTRY} LOCAL_IMAGE=${SIDECAR_IMAGE}
 
-prepare: 
+clone:
 	if [ -d "$(CLONE_UPSTREAM)" ]; then echo "Upstream is cloned"; else git clone $(UPSTREAM) ./$(CLONE_UPSTREAM); fi
+
+update: clone
+	git -C $(CLONE_UPSTREAM) pull origin master
+
+prepare: clone
 	# These are entirely new directory structures
 	cp -R sig-scheduler-plugins/pkg/fluence $(CLONE_UPSTREAM)/pkg/fluence
 	cp -R sig-scheduler-plugins/manifests/fluence $(CLONE_UPSTREAM)/manifests/fluence
diff --git a/README.md b/README.md
index 0e2b4c9..3aa66e6 100644
--- a/README.md
+++ b/README.md
@@ -440,7 +440,17 @@ You can see [deploy](#deploy) for instructions on how to do a custom deployment.
  - [sig-scheduler-plugins](sig-scheduler-plugins): includes assets (manifests and Go files) that are intended to be added to the kubernetes-sigs/scheduler-plugins upstream repository before build
  - *upstream*: the default name this upstream is cloned to when you do a make build command.
 
-Note that the clone of the repository and copying of files to the correct locations is all automated through the [Makefile](Makefile). This section exists to alert you to where to look for the different assets defined above.
+Note that the clone of the repository and copying of files to the correct locations is all automated through the [Makefile](Makefile). Additional commands provided include the following:
+
+```bash
+# Only clone the repository into ./upstream
+make clone
+
+# Update the cloned upstream with a git pull origin master
+make update
+```
+
+It's recommend to update once in a while if you have an older clone locally and there might be changes you are not accounting for.
 
 ## Papers
 

From 242d169f9785dfdf78306e1fa286896a5103c436 Mon Sep 17 00:00:00 2001
From: vsoch <vsoch@users.noreply.github.com>
Date: Mon, 8 Jan 2024 18:18:53 -0700
Subject: [PATCH 03/28] fix: restore pod group

Signed-off-by: vsoch <vsoch@users.noreply.github.com>

There are too many edge cases / too much complexity and behavior
that I do not understand to pursue having the pod group information
cached with fluence. For now I am nuking it and testing the intial
design as a sanity check.
---
 .gitignore                                    |   3 +
 Makefile                                      |  10 +-
 README.md                                     | 111 ++-
 examples/indexed-jobs/job1.yaml               |  21 +
 examples/indexed-jobs/job2.yaml               |  21 +
 .../pkg/fluence/core/core.go                  |  16 -
 sig-scheduler-plugins/pkg/fluence/fluence.go  | 138 +--
 .../pkg/fluence/fluxcli-grpc/fluxcli.pb.go    | 838 ------------------
 .../pkg/fluence/fluxcli-grpc/fluxcli.proto    |  76 --
 .../fluence/fluxcli-grpc/fluxcli_grpc.pb.go   | 139 ---
 .../pkg/fluence/utils/utils.go                |  93 +-
 src/Makefile                                  |  15 +-
 src/build/scheduler/Dockerfile                |   1 +
 src/fluence/fluxcli-grpc/fluxcli.pb.go        |   4 +-
 src/fluence/fluxcli-grpc/fluxcli.proto        |   2 +-
 src/fluence/fluxcli-grpc/fluxcli_grpc.pb.go   |   4 +
 src/fluence/fluxion/fluxion.go                |  56 +-
 src/fluence/go.mod                            |  29 +-
 src/fluence/jgf/jgf.go                        | 221 ++---
 src/fluence/jgf/types.go                      |  62 ++
 src/fluence/jobspec/jobspec.go                | 238 +++--
 src/fluence/jobspec/types.go                  |   1 +
 src/fluence/utils/utils.go                    |  52 +-
 23 files changed, 695 insertions(+), 1456 deletions(-)
 create mode 100644 examples/indexed-jobs/job1.yaml
 create mode 100644 examples/indexed-jobs/job2.yaml
 delete mode 100644 sig-scheduler-plugins/pkg/fluence/fluxcli-grpc/fluxcli.pb.go
 delete mode 100644 sig-scheduler-plugins/pkg/fluence/fluxcli-grpc/fluxcli.proto
 delete mode 100644 sig-scheduler-plugins/pkg/fluence/fluxcli-grpc/fluxcli_grpc.pb.go
 create mode 100644 src/fluence/jgf/types.go

diff --git a/.gitignore b/.gitignore
index fa1845c..51462a8 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,6 @@
 plugins
 upstream
 scheduler-plugins
+sig-scheduler-plugins/pkg/fluence/bin/
+src/bin
+src/fluence/vendor
\ No newline at end of file
diff --git a/Makefile b/Makefile
index e20f706..907f96e 100644
--- a/Makefile
+++ b/Makefile
@@ -10,9 +10,9 @@ SIDECAR_IMAGE ?= fluence-sidecar:latest
 CONTROLLER_IMAGE ?= fluence-controller
 SCHEDULER_IMAGE ?= fluence
 
-.PHONY: all build build-sidecar clone prepare push push-sidecar push-controller
+.PHONY: all build build-sidecar clone update push push-sidecar push-controller
 
-all: build-sidecar prepare build update clone update
+all: prepare build-sidecar build
 
 build-sidecar: 
 	make -C ./src LOCAL_REGISTRY=${REGISTRY} LOCAL_IMAGE=${SIDECAR_IMAGE}
@@ -25,14 +25,18 @@ update: clone
 
 prepare: clone
 	# These are entirely new directory structures
+	rm -rf $(CLONE_UPSTREAM)/pkg/fluence
+	rm -rf $(CLONE_UPSTREAM)/manifests/fluence
 	cp -R sig-scheduler-plugins/pkg/fluence $(CLONE_UPSTREAM)/pkg/fluence
 	cp -R sig-scheduler-plugins/manifests/fluence $(CLONE_UPSTREAM)/manifests/fluence
+	# This is the one exception not from sig-scheduler-plugins because it is needed in both spots
+	cp -R src/fluence/fluxcli-grpc $(CLONE_UPSTREAM)/pkg/fluence/fluxcli-grpc
 	# These are files with subtle changes to add fluence
 	cp sig-scheduler-plugins/cmd/scheduler/main.go ./upstream/cmd/scheduler/main.go
 	cp sig-scheduler-plugins/manifests/install/charts/as-a-second-scheduler/templates/deployment.yaml $(CLONE_UPSTREAM)/manifests/install/charts/as-a-second-scheduler/templates/deployment.yaml
 	cp sig-scheduler-plugins/manifests/install/charts/as-a-second-scheduler/values.yaml $(CLONE_UPSTREAM)/manifests/install/charts/as-a-second-scheduler/values.yaml
 
-build:
+build: prepare
 	REGISTRY=${REGISTRY} IMAGE=${SCHEDULER_IMAGE} CONTROLLER_IMAGE=${CONTROLLER_IMAGE} $(BASH) $(CLONE_UPSTREAM)/hack/build-images.sh
 
 push-sidecar:
diff --git a/README.md b/README.md
index 3aa66e6..4431050 100644
--- a/README.md
+++ b/README.md
@@ -9,10 +9,48 @@ Fluence enables HPC-grade pod scheduling in Kubernetes via the [Kubernetes Sched
 For instructions on how to start Fluence on a K8s cluster, see [examples](examples/). Documentation and instructions for reproducing our CANOPIE2022 paper (citation below) can be found in the [canopie22-artifacts branch](https://github.com/flux-framework/flux-k8s/tree/canopie22-artifacts).
 For background on the Flux framework and the Fluxion scheduler, you can take a look at our award-winning R&D100 submission: https://ipo.llnl.gov/sites/default/files/2022-02/Flux_RD100_Final.pdf. For next steps:
 
+ - To understand how it works, see [Design](#design)
  - To deploy our pre-built images, go to [Deploy](#deploy)
  - To build your own images, go to [Setup](#setup)
  - To learn about repository organization, see [Developer](#developer)
 
+### Design
+
+Fluence is a custom scheduler plugin that you can specify to use with two directive in your pod spec -
+
+- Asking for `fluence` as the scheduler name
+- Defining a named group of pods with the `fluence.flux-framework.org/pod-group` label. 
+- Defining the group size with the `fluence.flux-framework.org/group-size` label. 
+
+If you are using Fluence, these values are required.
+An example is shown below for an indexed job, which will create multiple pods.
+
+```yaml
+apiVersion: batch/v1
+kind: Job
+metadata:
+  name: fluence-job
+  annotations:
+    fluence.flux-framework.org/pod-group: my-pods
+    fluence.flux-framework.org/group-size: 10
+spec:
+  completions: 10
+  parallelism: 10
+  completionMode: Indexed
+  template:
+    spec:
+      schedulerName: fluence
+      containers:
+      - name: fluence-job
+        image: busybox
+        command: [echo, potato]
+      restartPolicy: Never
+  backoffLimit: 4
+```
+
+The group size might be different than, for example, your higher level abstraction (e.g., the IndexedJob) as there is no reason
+pods with different names cannot be part of the same group that needs to be scheduled together.
+
 ### Deploy
 
 We provide a set of pre-build containers [alongside the repository](https://github.com/orgs/flux-framework/packages?repo_name=flux-k8s)
@@ -434,10 +472,17 @@ Finally, note that we also have a more appropriate example with jobs under [exam
 
 ### Developer
 
-You can see [deploy](#deploy) for instructions on how to do a custom deployment. If you are looking to develop:
+You can see [deploy](#deploy) for instructions on how to do a custom deployment. 
+
+#### Organization
+
+If you are looking to develop:
 
- - [src](src): includes source code for fluence
- - [sig-scheduler-plugins](sig-scheduler-plugins): includes assets (manifests and Go files) that are intended to be added to the kubernetes-sigs/scheduler-plugins upstream repository before build
+ - [src](src): includes source code for fluence. You'll find logs for this code in the `sidecar` container of the fluence pod.
+ - [sig-scheduler-plugins](sig-scheduler-plugins): includes assets (manifests and Go files) that are intended to be added to the kubernetes-sigs/scheduler-plugins upstream repository before build. You'll find logs for this container in the `scheduler-plugins-scheduler` container of the pod.
+   - [manifests](sig-scheduler-plugins/manifests): manifests for helm and Kubernetes
+   - [pkg](sig-scheduler-plugins/pkg): the main fluence module to add to upstream
+   - [cmd](sig-scheduler-plugins/cmd): the main.go to replace in upstream
  - *upstream*: the default name this upstream is cloned to when you do a make build command.
 
 Note that the clone of the repository and copying of files to the correct locations is all automated through the [Makefile](Makefile). Additional commands provided include the following:
@@ -452,6 +497,66 @@ make update
 
 It's recommend to update once in a while if you have an older clone locally and there might be changes you are not accounting for.
 
+#### GRPC
+
+The fluence module uses GRPC to communicate with Flux, and these assets are stored in [src/fluence/fluxcli-grpc](src/fluence/fluxcli-grpc).
+You should *only* update the [sig-scheduler-plugins/pkg/fluence/fluxcli-grpc/fluxcli.proto](src/fluence/fluxcli-grpc/fluxcli.proto) file,
+and then from the root run `make proto` to re-generate the other files:
+
+```bash
+cd src
+
+# Install protoc tools to local bin
+# make protoc
+make proto
+```
+
+#### Workflow
+
+The easiest thing to do is to build the containers in some container namespace that you control (meaning you can push to a registry), e.g.,:
+
+```bash
+make build REGISTRY=ghcr.io/vsoch
+```
+
+And then install with your custom images:
+
+```
+cd ./upstream/manifests/install/charts
+helm install \
+  --set scheduler.image=ghcr.io/vsoch/fluence:latest \
+  --set scheduler.sidecarimage=ghcr.io/vsoch/fluence-sidecar:latest \
+        schedscheduler-plugins as-a-second-scheduler/
+```
+
+And then apply what you need to test, and look at logs! 
+And then keep doing that until you get what you want :) Note that I haven't found a good way for the VSCode developer tools to work because we develop fluence outside of the tree it's supposed to be in.
+
+#### Components
+
+ - [FluxStateData](sig-scheduler-plugins/pkg/fluence/core/core.go): is given to the [framework.CycleState](https://github.com/kubernetes/kubernetes/blob/242b41b36a20032f99e8a059ca0a5d764105217b/pkg/scheduler/framework/cycle_state.go#L48) and serves as a vehicle to store a cache of node name assignment.
+
+
+#### Helm
+
+The install commands are shown above, but often you want to uninstall!
+
+> What is the name of the installed plugin again?
+
+```bash
+ helm list
+NAME                    NAMESPACE       REVISION        UPDATED                                 STATUS          CHART                   APP VERSION
+schedscheduler-plugins  default         1               2024-01-08 12:04:58.558612156 -0700 MST deployed        scheduler-plugins-0.27.80.27.8     
+```
+
+And then uninstall:
+
+```bash
+$ helm uninstall schedscheduler-plugins
+release "schedscheduler-plugins" uninstalled
+```
+
+
 ## Papers
 
 You can find details of Fluence architecture, implementation, experiments, and improvements to the Kubeflow MPI operator in our collaboration's papers:
diff --git a/examples/indexed-jobs/job1.yaml b/examples/indexed-jobs/job1.yaml
new file mode 100644
index 0000000..5778bc3
--- /dev/null
+++ b/examples/indexed-jobs/job1.yaml
@@ -0,0 +1,21 @@
+apiVersion: batch/v1
+kind: Job
+metadata:
+  name: job-1
+spec:
+  completions: 10
+  parallelism: 10
+  completionMode: Indexed
+  template:
+    metadata:
+      labels:
+        fluence.pod-group: job-1
+        fluence.group-size: "5"
+    spec:
+      schedulerName: fluence
+      containers:
+      - name: fluence-job
+        image: busybox
+        command: [sleep, "10"]
+      restartPolicy: Never
+  backoffLimit: 4
diff --git a/examples/indexed-jobs/job2.yaml b/examples/indexed-jobs/job2.yaml
new file mode 100644
index 0000000..3d77660
--- /dev/null
+++ b/examples/indexed-jobs/job2.yaml
@@ -0,0 +1,21 @@
+apiVersion: batch/v1
+kind: Job
+metadata:
+  name: job-2
+spec:
+  completions: 5
+  parallelism: 5
+  completionMode: Indexed
+  template:
+    metadata:
+      labels:
+        fluence.pod-group: job-2
+        fluence.group-size: "5"
+    spec:
+      schedulerName: fluence
+      containers:
+      - name: fluence-job
+        image: busybox
+        command: [sleep, "10"]
+      restartPolicy: Never
+  backoffLimit: 4
diff --git a/sig-scheduler-plugins/pkg/fluence/core/core.go b/sig-scheduler-plugins/pkg/fluence/core/core.go
index 11c90ef..5914441 100644
--- a/sig-scheduler-plugins/pkg/fluence/core/core.go
+++ b/sig-scheduler-plugins/pkg/fluence/core/core.go
@@ -1,19 +1,3 @@
-/*
-Copyright 2022 The Kubernetes Authors.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-*/
-
 package core
 
 import (
diff --git a/sig-scheduler-plugins/pkg/fluence/fluence.go b/sig-scheduler-plugins/pkg/fluence/fluence.go
index a705e2c..32fd513 100644
--- a/sig-scheduler-plugins/pkg/fluence/fluence.go
+++ b/sig-scheduler-plugins/pkg/fluence/fluence.go
@@ -36,8 +36,9 @@ import (
 	"k8s.io/kubernetes/pkg/scheduler/framework"
 	"k8s.io/kubernetes/pkg/scheduler/metrics"
 
+	corelisters "k8s.io/client-go/listers/core/v1"
 	"sigs.k8s.io/controller-runtime/pkg/client"
-	"sigs.k8s.io/scheduler-plugins/apis/scheduling/v1alpha1"
+	sched "sigs.k8s.io/scheduler-plugins/apis/scheduling/v1alpha1"
 	coschedulingcore "sigs.k8s.io/scheduler-plugins/pkg/coscheduling/core"
 	fcore "sigs.k8s.io/scheduler-plugins/pkg/fluence/core"
 	pb "sigs.k8s.io/scheduler-plugins/pkg/fluence/fluxcli-grpc"
@@ -47,27 +48,38 @@ import (
 type Fluence struct {
 	mutex          sync.Mutex
 	handle         framework.Handle
+	client         client.Client
 	podNameToJobId map[string]uint64
 	pgMgr          coschedulingcore.Manager
-}
 
-var _ framework.QueueSortPlugin = &Fluence{}
-var _ framework.PreFilterPlugin = &Fluence{}
-var _ framework.FilterPlugin = &Fluence{}
+	// The pod group manager has a lister, but it's private
+	podLister corelisters.PodLister
+}
 
 // Name is the name of the plugin used in the Registry and configurations.
-const Name = "Fluence"
+// Note that this would do better as an annotation (fluence.flux-framework.org/pod-group)
+// But we cannot use them as selectors then!
+const (
+	Name = "Fluence"
+)
+
+var (
+	_ framework.QueueSortPlugin = &Fluence{}
+	_ framework.PreFilterPlugin = &Fluence{}
+	_ framework.FilterPlugin    = &Fluence{}
+)
 
 func (f *Fluence) Name() string {
 	return Name
 }
 
 // Initialize and return a new Fluence Custom Scheduler Plugin
-// Note from vsoch: seems analogous to:
+// This class and functions are analogous to:
 // https://github.com/kubernetes-sigs/scheduler-plugins/blob/master/pkg/coscheduling/coscheduling.go#L63
 func New(_ runtime.Object, handle framework.Handle) (framework.Plugin, error) {
 
 	f := &Fluence{handle: handle, podNameToJobId: make(map[string]uint64)}
+
 	klog.Info("Create plugin")
 	ctx := context.TODO()
 	fcore.Init()
@@ -84,32 +96,38 @@ func New(_ runtime.Object, handle framework.Handle) (framework.Plugin, error) {
 	scheme := runtime.NewScheme()
 	clientscheme.AddToScheme(scheme)
 	v1.AddToScheme(scheme)
-	v1alpha1.AddToScheme(scheme)
-	client, err := client.New(handle.KubeConfig(), client.Options{Scheme: scheme})
+	sched.AddToScheme(scheme)
+	k8scli, err := client.New(handle.KubeConfig(), client.Options{Scheme: scheme})
 	if err != nil {
 		return nil, err
 	}
 
+	// Save the kubernetes client for fluence to interact with cluster objects
+	f.client = k8scli
+
 	fieldSelector, err := fields.ParseSelector(",status.phase!=" + string(v1.PodSucceeded) + ",status.phase!=" + string(v1.PodFailed))
 	if err != nil {
 		klog.ErrorS(err, "ParseSelector failed")
 		os.Exit(1)
 	}
+
 	informerFactory := informers.NewSharedInformerFactoryWithOptions(handle.ClientSet(), 0, informers.WithTweakListOptions(func(opt *metav1.ListOptions) {
 		opt.FieldSelector = fieldSelector.String()
 	}))
 	podInformer := informerFactory.Core().V1().Pods()
-
 	scheduleTimeDuration := time.Duration(500) * time.Second
 
 	pgMgr := coschedulingcore.NewPodGroupManager(
-		client,
+		k8scli,
 		handle.SnapshotSharedLister(),
 		&scheduleTimeDuration,
 		podInformer,
 	)
 	f.pgMgr = pgMgr
 
+	// Save the podLister to fluence to easily query for the group
+	f.podLister = podInformer.Lister()
+
 	// stopCh := make(chan struct{})
 	// defer close(stopCh)
 	// informerFactory.Start(stopCh)
@@ -127,8 +145,9 @@ func New(_ runtime.Object, handle framework.Handle) (framework.Plugin, error) {
 
 // Less is used to sort pods in the scheduling queue in the following order.
 // 1. Compare the priorities of Pods.
-// 2. Compare the initialization timestamps of PodGroups or Pods.
-// 3. Compare the keys of PodGroups/Pods: <namespace>/<podname>.
+// 2. Compare the initialization timestamps of fluence pod groups
+// 3. Fall back, sort by namespace/name
+// See 	https://kubernetes.io/docs/concepts/scheduling-eviction/scheduling-framework/
 func (f *Fluence) Less(podInfo1, podInfo2 *framework.QueuedPodInfo) bool {
 	klog.Infof("ordering pods from Coscheduling")
 	prio1 := corev1helpers.PodPriority(podInfo1.Pod)
@@ -144,59 +163,72 @@ func (f *Fluence) Less(podInfo1, podInfo2 *framework.QueuedPodInfo) bool {
 	return creationTime1.Before(creationTime2)
 }
 
-func (f *Fluence) PreFilter(ctx context.Context, state *framework.CycleState, pod *v1.Pod) (*framework.PreFilterResult, *framework.Status) {
+// getPodGroup gets the group information from the pod group manager
+// to determine if a pod is in a group. We return the group
+func (f *Fluence) getPodGroup(ctx context.Context, pod *v1.Pod) (string, *sched.PodGroup) {
+	pgName, pg := f.pgMgr.GetPodGroup(ctx, pod)
+	if pg == nil {
+		klog.InfoS("Not in group", "pod", klog.KObj(pod))
+	}
+	return pgName, pg
+}
+
+// PreFilter checks info about the Pod / checks conditions that the cluster or the Pod must meet.
+// This still comes after sort
+func (f *Fluence) PreFilter(
+	ctx context.Context,
+	state *framework.CycleState,
+	pod *v1.Pod,
+) (*framework.PreFilterResult, *framework.Status) {
+
+	var (
+		err      error
+		nodename string
+	)
 	klog.Infof("Examining the pod")
-	var err error
-	var nodename string
-	if pgname, ok := f.isGroup(ctx, pod); ok {
-		if !fcore.HaveList(pgname) {
+
+	// Get the pod group name and group
+	groupName, pg := f.getPodGroup(ctx, pod)
+	klog.Infof("group name is %s", groupName)
+
+	// Case 1: We have a pod group
+	if pg != nil {
+
+		// We have not yet derived a node list
+		if !fcore.HaveList(groupName) {
 			klog.Infof("Getting a pod group")
-			groupSize, _ := f.groupPreFilter(ctx, pod)
-			if _, err = f.AskFlux(ctx, pod, groupSize); err != nil {
+			if _, err = f.AskFlux(ctx, pod, int(pg.Spec.MinMember)); err != nil {
 				return nil, framework.NewStatus(framework.Unschedulable, err.Error())
 			}
 		}
-		nodename, err = fcore.GetNextNode(pgname)
-		klog.Infof("Node Selected %s (%s:%s)", nodename, pod.Name, pgname)
+		nodename, err = fcore.GetNextNode(groupName)
+		klog.Infof("Node Selected %s (%s:%s)", nodename, pod.Name, groupName)
 		if err != nil {
 			return nil, framework.NewStatus(framework.Unschedulable, err.Error())
 		}
+
 	} else {
+
+		// Case 2: no group, a faux group of a lonely 1 :(
 		nodename, err = f.AskFlux(ctx, pod, 1)
 		if err != nil {
 			return nil, framework.NewStatus(framework.Unschedulable, err.Error())
 		}
 	}
 
+	// Create a fluxState (CycleState) with things that might be useful/
 	klog.Info("Node Selected: ", nodename)
 	state.Write(framework.StateKey(pod.Name), &fcore.FluxStateData{NodeName: nodename})
 	return nil, framework.NewStatus(framework.Success, "")
-
-}
-
-func (f *Fluence) isGroup(ctx context.Context, pod *v1.Pod) (string, bool) {
-	pgFullName, pg := f.pgMgr.GetPodGroup(ctx, pod)
-	if pg == nil {
-		klog.InfoS("Not in group", "pod", klog.KObj(pod))
-		return "", false
-	}
-	return pgFullName, true
 }
 
-func (f *Fluence) groupPreFilter(ctx context.Context, pod *v1.Pod) (int, error) {
-	// klog.InfoS("Flux Pre-Filter", "pod", klog.KObj(pod))
-	klog.InfoS("Flux Pre-Filter", "pod labels", pod.Labels)
-	_, pg := f.pgMgr.GetPodGroup(ctx, pod)
-	if pg == nil {
-		klog.InfoS("Not in group", "pod", klog.KObj(pod))
-		return 0, nil
-	}
-
-	klog.Info("pod group members ", pg.Spec.MinMember)
-	return int(pg.Spec.MinMember), nil
-}
+func (f *Fluence) Filter(
+	ctx context.Context,
+	cycleState *framework.CycleState,
+	pod *v1.Pod,
+	nodeInfo *framework.NodeInfo,
+) *framework.Status {
 
-func (f *Fluence) Filter(ctx context.Context, cycleState *framework.CycleState, pod *v1.Pod, nodeInfo *framework.NodeInfo) *framework.Status {
 	klog.Info("Filtering input node ", nodeInfo.Node().Name)
 	if v, e := cycleState.Read(framework.StateKey(pod.Name)); e == nil {
 		if value, ok := v.(*fcore.FluxStateData); ok && value.NodeName != nodeInfo.Node().Name {
@@ -205,14 +237,16 @@ func (f *Fluence) Filter(ctx context.Context, cycleState *framework.CycleState,
 			klog.Info("Filter: node selected by Flux ", value.NodeName)
 		}
 	}
-
 	return framework.NewStatus(framework.Success)
 }
 
+// PreFilterExtensions allow for callbacks on filtered states
+// https://github.com/kubernetes/kubernetes/blob/master/pkg/scheduler/framework/interface.go#L383
 func (f *Fluence) PreFilterExtensions() framework.PreFilterExtensions {
 	return nil
 }
 
+// AskFlux will ask flux for an allocation for nodes for the pod group.
 func (f *Fluence) AskFlux(ctx context.Context, pod *v1.Pod, count int) (string, error) {
 	// clean up previous match if a pod has already allocated previously
 	f.mutex.Lock()
@@ -252,8 +286,9 @@ func (f *Fluence) AskFlux(ctx context.Context, pod *v1.Pod, count int) (string,
 
 	klog.Infof("[FluxClient] response podID %s", r.GetPodID())
 
-	_, ok := f.isGroup(ctx, pod)
-	if count > 1 || ok {
+	_, pg := f.getPodGroup(ctx, pod)
+
+	if count > 1 || pg != nil {
 		pgFullName, _ := f.pgMgr.GetPodGroup(ctx, pod)
 		nodelist := fcore.CreateNodePodsList(r.GetNodelist(), pgFullName)
 		klog.Infof("[FluxClient] response nodeID %s", r.GetNodelist())
@@ -279,6 +314,7 @@ func (f *Fluence) AskFlux(ctx context.Context, pod *v1.Pod, count int) (string,
 	return "", nil
 }
 
+// cancelFluxJobForPod cancels the flux job for a pod.
 func (f *Fluence) cancelFluxJobForPod(podName string) error {
 	jobid := f.podNameToJobId[podName]
 
@@ -325,11 +361,13 @@ func (f *Fluence) cancelFluxJobForPod(podName string) error {
 	return nil
 }
 
-// EventHandlers
+// EventHandlers updatePod handles cleaning up resources
 func (f *Fluence) updatePod(oldObj, newObj interface{}) {
 	// klog.Info("Update Pod event handler")
 	newPod := newObj.(*v1.Pod)
-	klog.Infof("Processing event for pod %s", newPod)
+
+	klog.Infof("Processing event for pod %s", newPod.Name)
+
 	switch newPod.Status.Phase {
 	case v1.PodPending:
 		// in this state we don't know if a pod is going to be running, thus we don't need to update job map
diff --git a/sig-scheduler-plugins/pkg/fluence/fluxcli-grpc/fluxcli.pb.go b/sig-scheduler-plugins/pkg/fluence/fluxcli-grpc/fluxcli.pb.go
deleted file mode 100644
index e317af2..0000000
--- a/sig-scheduler-plugins/pkg/fluence/fluxcli-grpc/fluxcli.pb.go
+++ /dev/null
@@ -1,838 +0,0 @@
-// Code generated by protoc-gen-go. DO NOT EDIT.
-// versions:
-// 	protoc-gen-go v1.26.0
-// 	protoc        v3.15.8
-// source: fluence/fluxcli-grpc/fluxcli.proto
-
-package fluxcli
-
-import (
-	protoreflect "google.golang.org/protobuf/reflect/protoreflect"
-	protoimpl "google.golang.org/protobuf/runtime/protoimpl"
-	reflect "reflect"
-	sync "sync"
-)
-
-const (
-	// Verify that this generated code is sufficiently up-to-date.
-	_ = protoimpl.EnforceVersion(20 - protoimpl.MinVersion)
-	// Verify that runtime/protoimpl is sufficiently up-to-date.
-	_ = protoimpl.EnforceVersion(protoimpl.MaxVersion - 20)
-)
-
-type PodSpec struct {
-	state         protoimpl.MessageState
-	sizeCache     protoimpl.SizeCache
-	unknownFields protoimpl.UnknownFields
-
-	Id        string   `protobuf:"bytes,1,opt,name=id,proto3" json:"id,omitempty"`
-	Container string   `protobuf:"bytes,2,opt,name=container,proto3" json:"container,omitempty"`
-	Cpu       int32    `protobuf:"varint,3,opt,name=cpu,proto3" json:"cpu,omitempty"`
-	Memory    int64    `protobuf:"varint,4,opt,name=memory,proto3" json:"memory,omitempty"`
-	Gpu       int64    `protobuf:"varint,5,opt,name=gpu,proto3" json:"gpu,omitempty"`
-	Storage   int64    `protobuf:"varint,6,opt,name=storage,proto3" json:"storage,omitempty"`
-	Labels    []string `protobuf:"bytes,7,rep,name=labels,proto3" json:"labels,omitempty"`
-}
-
-func (x *PodSpec) Reset() {
-	*x = PodSpec{}
-	if protoimpl.UnsafeEnabled {
-		mi := &file_fluence_fluxcli_grpc_fluxcli_proto_msgTypes[0]
-		ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
-		ms.StoreMessageInfo(mi)
-	}
-}
-
-func (x *PodSpec) String() string {
-	return protoimpl.X.MessageStringOf(x)
-}
-
-func (*PodSpec) ProtoMessage() {}
-
-func (x *PodSpec) ProtoReflect() protoreflect.Message {
-	mi := &file_fluence_fluxcli_grpc_fluxcli_proto_msgTypes[0]
-	if protoimpl.UnsafeEnabled && x != nil {
-		ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
-		if ms.LoadMessageInfo() == nil {
-			ms.StoreMessageInfo(mi)
-		}
-		return ms
-	}
-	return mi.MessageOf(x)
-}
-
-// Deprecated: Use PodSpec.ProtoReflect.Descriptor instead.
-func (*PodSpec) Descriptor() ([]byte, []int) {
-	return file_fluence_fluxcli_grpc_fluxcli_proto_rawDescGZIP(), []int{0}
-}
-
-func (x *PodSpec) GetId() string {
-	if x != nil {
-		return x.Id
-	}
-	return ""
-}
-
-func (x *PodSpec) GetContainer() string {
-	if x != nil {
-		return x.Container
-	}
-	return ""
-}
-
-func (x *PodSpec) GetCpu() int32 {
-	if x != nil {
-		return x.Cpu
-	}
-	return 0
-}
-
-func (x *PodSpec) GetMemory() int64 {
-	if x != nil {
-		return x.Memory
-	}
-	return 0
-}
-
-func (x *PodSpec) GetGpu() int64 {
-	if x != nil {
-		return x.Gpu
-	}
-	return 0
-}
-
-func (x *PodSpec) GetStorage() int64 {
-	if x != nil {
-		return x.Storage
-	}
-	return 0
-}
-
-func (x *PodSpec) GetLabels() []string {
-	if x != nil {
-		return x.Labels
-	}
-	return nil
-}
-
-// The Match request message (allocate, allocate_orelse_reserve)
-type MatchRequest struct {
-	state         protoimpl.MessageState
-	sizeCache     protoimpl.SizeCache
-	unknownFields protoimpl.UnknownFields
-
-	Ps      *PodSpec `protobuf:"bytes,1,opt,name=ps,proto3" json:"ps,omitempty"`
-	Request string   `protobuf:"bytes,2,opt,name=request,proto3" json:"request,omitempty"`
-	Count   int32    `protobuf:"varint,3,opt,name=count,proto3" json:"count,omitempty"`
-}
-
-func (x *MatchRequest) Reset() {
-	*x = MatchRequest{}
-	if protoimpl.UnsafeEnabled {
-		mi := &file_fluence_fluxcli_grpc_fluxcli_proto_msgTypes[1]
-		ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
-		ms.StoreMessageInfo(mi)
-	}
-}
-
-func (x *MatchRequest) String() string {
-	return protoimpl.X.MessageStringOf(x)
-}
-
-func (*MatchRequest) ProtoMessage() {}
-
-func (x *MatchRequest) ProtoReflect() protoreflect.Message {
-	mi := &file_fluence_fluxcli_grpc_fluxcli_proto_msgTypes[1]
-	if protoimpl.UnsafeEnabled && x != nil {
-		ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
-		if ms.LoadMessageInfo() == nil {
-			ms.StoreMessageInfo(mi)
-		}
-		return ms
-	}
-	return mi.MessageOf(x)
-}
-
-// Deprecated: Use MatchRequest.ProtoReflect.Descriptor instead.
-func (*MatchRequest) Descriptor() ([]byte, []int) {
-	return file_fluence_fluxcli_grpc_fluxcli_proto_rawDescGZIP(), []int{1}
-}
-
-func (x *MatchRequest) GetPs() *PodSpec {
-	if x != nil {
-		return x.Ps
-	}
-	return nil
-}
-
-func (x *MatchRequest) GetRequest() string {
-	if x != nil {
-		return x.Request
-	}
-	return ""
-}
-
-func (x *MatchRequest) GetCount() int32 {
-	if x != nil {
-		return x.Count
-	}
-	return 0
-}
-
-// The Nodes/Cluster Update Status
-type NodeAlloc struct {
-	state         protoimpl.MessageState
-	sizeCache     protoimpl.SizeCache
-	unknownFields protoimpl.UnknownFields
-
-	NodeID string `protobuf:"bytes,1,opt,name=nodeID,proto3" json:"nodeID,omitempty"`
-	Tasks  int32  `protobuf:"varint,2,opt,name=tasks,proto3" json:"tasks,omitempty"`
-}
-
-func (x *NodeAlloc) Reset() {
-	*x = NodeAlloc{}
-	if protoimpl.UnsafeEnabled {
-		mi := &file_fluence_fluxcli_grpc_fluxcli_proto_msgTypes[2]
-		ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
-		ms.StoreMessageInfo(mi)
-	}
-}
-
-func (x *NodeAlloc) String() string {
-	return protoimpl.X.MessageStringOf(x)
-}
-
-func (*NodeAlloc) ProtoMessage() {}
-
-func (x *NodeAlloc) ProtoReflect() protoreflect.Message {
-	mi := &file_fluence_fluxcli_grpc_fluxcli_proto_msgTypes[2]
-	if protoimpl.UnsafeEnabled && x != nil {
-		ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
-		if ms.LoadMessageInfo() == nil {
-			ms.StoreMessageInfo(mi)
-		}
-		return ms
-	}
-	return mi.MessageOf(x)
-}
-
-// Deprecated: Use NodeAlloc.ProtoReflect.Descriptor instead.
-func (*NodeAlloc) Descriptor() ([]byte, []int) {
-	return file_fluence_fluxcli_grpc_fluxcli_proto_rawDescGZIP(), []int{2}
-}
-
-func (x *NodeAlloc) GetNodeID() string {
-	if x != nil {
-		return x.NodeID
-	}
-	return ""
-}
-
-func (x *NodeAlloc) GetTasks() int32 {
-	if x != nil {
-		return x.Tasks
-	}
-	return 0
-}
-
-// The Match response message
-type MatchResponse struct {
-	state         protoimpl.MessageState
-	sizeCache     protoimpl.SizeCache
-	unknownFields protoimpl.UnknownFields
-
-	PodID    string       `protobuf:"bytes,1,opt,name=podID,proto3" json:"podID,omitempty"`
-	Nodelist []*NodeAlloc `protobuf:"bytes,2,rep,name=nodelist,proto3" json:"nodelist,omitempty"`
-	JobID    int64        `protobuf:"varint,3,opt,name=jobID,proto3" json:"jobID,omitempty"`
-}
-
-func (x *MatchResponse) Reset() {
-	*x = MatchResponse{}
-	if protoimpl.UnsafeEnabled {
-		mi := &file_fluence_fluxcli_grpc_fluxcli_proto_msgTypes[3]
-		ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
-		ms.StoreMessageInfo(mi)
-	}
-}
-
-func (x *MatchResponse) String() string {
-	return protoimpl.X.MessageStringOf(x)
-}
-
-func (*MatchResponse) ProtoMessage() {}
-
-func (x *MatchResponse) ProtoReflect() protoreflect.Message {
-	mi := &file_fluence_fluxcli_grpc_fluxcli_proto_msgTypes[3]
-	if protoimpl.UnsafeEnabled && x != nil {
-		ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
-		if ms.LoadMessageInfo() == nil {
-			ms.StoreMessageInfo(mi)
-		}
-		return ms
-	}
-	return mi.MessageOf(x)
-}
-
-// Deprecated: Use MatchResponse.ProtoReflect.Descriptor instead.
-func (*MatchResponse) Descriptor() ([]byte, []int) {
-	return file_fluence_fluxcli_grpc_fluxcli_proto_rawDescGZIP(), []int{3}
-}
-
-func (x *MatchResponse) GetPodID() string {
-	if x != nil {
-		return x.PodID
-	}
-	return ""
-}
-
-func (x *MatchResponse) GetNodelist() []*NodeAlloc {
-	if x != nil {
-		return x.Nodelist
-	}
-	return nil
-}
-
-func (x *MatchResponse) GetJobID() int64 {
-	if x != nil {
-		return x.JobID
-	}
-	return 0
-}
-
-type CancelRequest struct {
-	state         protoimpl.MessageState
-	sizeCache     protoimpl.SizeCache
-	unknownFields protoimpl.UnknownFields
-
-	JobID int64 `protobuf:"varint,2,opt,name=jobID,proto3" json:"jobID,omitempty"`
-}
-
-func (x *CancelRequest) Reset() {
-	*x = CancelRequest{}
-	if protoimpl.UnsafeEnabled {
-		mi := &file_fluence_fluxcli_grpc_fluxcli_proto_msgTypes[4]
-		ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
-		ms.StoreMessageInfo(mi)
-	}
-}
-
-func (x *CancelRequest) String() string {
-	return protoimpl.X.MessageStringOf(x)
-}
-
-func (*CancelRequest) ProtoMessage() {}
-
-func (x *CancelRequest) ProtoReflect() protoreflect.Message {
-	mi := &file_fluence_fluxcli_grpc_fluxcli_proto_msgTypes[4]
-	if protoimpl.UnsafeEnabled && x != nil {
-		ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
-		if ms.LoadMessageInfo() == nil {
-			ms.StoreMessageInfo(mi)
-		}
-		return ms
-	}
-	return mi.MessageOf(x)
-}
-
-// Deprecated: Use CancelRequest.ProtoReflect.Descriptor instead.
-func (*CancelRequest) Descriptor() ([]byte, []int) {
-	return file_fluence_fluxcli_grpc_fluxcli_proto_rawDescGZIP(), []int{4}
-}
-
-func (x *CancelRequest) GetJobID() int64 {
-	if x != nil {
-		return x.JobID
-	}
-	return 0
-}
-
-// The Match response message
-type CancelResponse struct {
-	state         protoimpl.MessageState
-	sizeCache     protoimpl.SizeCache
-	unknownFields protoimpl.UnknownFields
-
-	JobID int64 `protobuf:"varint,1,opt,name=jobID,proto3" json:"jobID,omitempty"`
-	Error int32 `protobuf:"varint,2,opt,name=error,proto3" json:"error,omitempty"`
-}
-
-func (x *CancelResponse) Reset() {
-	*x = CancelResponse{}
-	if protoimpl.UnsafeEnabled {
-		mi := &file_fluence_fluxcli_grpc_fluxcli_proto_msgTypes[5]
-		ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
-		ms.StoreMessageInfo(mi)
-	}
-}
-
-func (x *CancelResponse) String() string {
-	return protoimpl.X.MessageStringOf(x)
-}
-
-func (*CancelResponse) ProtoMessage() {}
-
-func (x *CancelResponse) ProtoReflect() protoreflect.Message {
-	mi := &file_fluence_fluxcli_grpc_fluxcli_proto_msgTypes[5]
-	if protoimpl.UnsafeEnabled && x != nil {
-		ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
-		if ms.LoadMessageInfo() == nil {
-			ms.StoreMessageInfo(mi)
-		}
-		return ms
-	}
-	return mi.MessageOf(x)
-}
-
-// Deprecated: Use CancelResponse.ProtoReflect.Descriptor instead.
-func (*CancelResponse) Descriptor() ([]byte, []int) {
-	return file_fluence_fluxcli_grpc_fluxcli_proto_rawDescGZIP(), []int{5}
-}
-
-func (x *CancelResponse) GetJobID() int64 {
-	if x != nil {
-		return x.JobID
-	}
-	return 0
-}
-
-func (x *CancelResponse) GetError() int32 {
-	if x != nil {
-		return x.Error
-	}
-	return 0
-}
-
-// The Nodes/Cluster Update Status
-type NodeStatus struct {
-	state         protoimpl.MessageState
-	sizeCache     protoimpl.SizeCache
-	unknownFields protoimpl.UnknownFields
-
-	CpuAvail     int32  `protobuf:"varint,1,opt,name=cpuAvail,proto3" json:"cpuAvail,omitempty"`
-	GpuAvail     int32  `protobuf:"varint,2,opt,name=gpuAvail,proto3" json:"gpuAvail,omitempty"`
-	StorageAvail int64  `protobuf:"varint,3,opt,name=storageAvail,proto3" json:"storageAvail,omitempty"`
-	MemoryAvail  int64  `protobuf:"varint,4,opt,name=memoryAvail,proto3" json:"memoryAvail,omitempty"`
-	AllowedPods  int64  `protobuf:"varint,5,opt,name=allowedPods,proto3" json:"allowedPods,omitempty"`
-	NodeIP       string `protobuf:"bytes,6,opt,name=nodeIP,proto3" json:"nodeIP,omitempty"`
-	Replication  int32  `protobuf:"varint,7,opt,name=replication,proto3" json:"replication,omitempty"`
-}
-
-func (x *NodeStatus) Reset() {
-	*x = NodeStatus{}
-	if protoimpl.UnsafeEnabled {
-		mi := &file_fluence_fluxcli_grpc_fluxcli_proto_msgTypes[6]
-		ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
-		ms.StoreMessageInfo(mi)
-	}
-}
-
-func (x *NodeStatus) String() string {
-	return protoimpl.X.MessageStringOf(x)
-}
-
-func (*NodeStatus) ProtoMessage() {}
-
-func (x *NodeStatus) ProtoReflect() protoreflect.Message {
-	mi := &file_fluence_fluxcli_grpc_fluxcli_proto_msgTypes[6]
-	if protoimpl.UnsafeEnabled && x != nil {
-		ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
-		if ms.LoadMessageInfo() == nil {
-			ms.StoreMessageInfo(mi)
-		}
-		return ms
-	}
-	return mi.MessageOf(x)
-}
-
-// Deprecated: Use NodeStatus.ProtoReflect.Descriptor instead.
-func (*NodeStatus) Descriptor() ([]byte, []int) {
-	return file_fluence_fluxcli_grpc_fluxcli_proto_rawDescGZIP(), []int{6}
-}
-
-func (x *NodeStatus) GetCpuAvail() int32 {
-	if x != nil {
-		return x.CpuAvail
-	}
-	return 0
-}
-
-func (x *NodeStatus) GetGpuAvail() int32 {
-	if x != nil {
-		return x.GpuAvail
-	}
-	return 0
-}
-
-func (x *NodeStatus) GetStorageAvail() int64 {
-	if x != nil {
-		return x.StorageAvail
-	}
-	return 0
-}
-
-func (x *NodeStatus) GetMemoryAvail() int64 {
-	if x != nil {
-		return x.MemoryAvail
-	}
-	return 0
-}
-
-func (x *NodeStatus) GetAllowedPods() int64 {
-	if x != nil {
-		return x.AllowedPods
-	}
-	return 0
-}
-
-func (x *NodeStatus) GetNodeIP() string {
-	if x != nil {
-		return x.NodeIP
-	}
-	return ""
-}
-
-func (x *NodeStatus) GetReplication() int32 {
-	if x != nil {
-		return x.Replication
-	}
-	return 0
-}
-
-// The JGF response message
-type JGFRequest struct {
-	state         protoimpl.MessageState
-	sizeCache     protoimpl.SizeCache
-	unknownFields protoimpl.UnknownFields
-
-	Jgf string `protobuf:"bytes,1,opt,name=jgf,proto3" json:"jgf,omitempty"`
-}
-
-func (x *JGFRequest) Reset() {
-	*x = JGFRequest{}
-	if protoimpl.UnsafeEnabled {
-		mi := &file_fluence_fluxcli_grpc_fluxcli_proto_msgTypes[7]
-		ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
-		ms.StoreMessageInfo(mi)
-	}
-}
-
-func (x *JGFRequest) String() string {
-	return protoimpl.X.MessageStringOf(x)
-}
-
-func (*JGFRequest) ProtoMessage() {}
-
-func (x *JGFRequest) ProtoReflect() protoreflect.Message {
-	mi := &file_fluence_fluxcli_grpc_fluxcli_proto_msgTypes[7]
-	if protoimpl.UnsafeEnabled && x != nil {
-		ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
-		if ms.LoadMessageInfo() == nil {
-			ms.StoreMessageInfo(mi)
-		}
-		return ms
-	}
-	return mi.MessageOf(x)
-}
-
-// Deprecated: Use JGFRequest.ProtoReflect.Descriptor instead.
-func (*JGFRequest) Descriptor() ([]byte, []int) {
-	return file_fluence_fluxcli_grpc_fluxcli_proto_rawDescGZIP(), []int{7}
-}
-
-func (x *JGFRequest) GetJgf() string {
-	if x != nil {
-		return x.Jgf
-	}
-	return ""
-}
-
-// The JGF response message
-type JGFResponse struct {
-	state         protoimpl.MessageState
-	sizeCache     protoimpl.SizeCache
-	unknownFields protoimpl.UnknownFields
-
-	Jgf string `protobuf:"bytes,1,opt,name=jgf,proto3" json:"jgf,omitempty"`
-}
-
-func (x *JGFResponse) Reset() {
-	*x = JGFResponse{}
-	if protoimpl.UnsafeEnabled {
-		mi := &file_fluence_fluxcli_grpc_fluxcli_proto_msgTypes[8]
-		ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
-		ms.StoreMessageInfo(mi)
-	}
-}
-
-func (x *JGFResponse) String() string {
-	return protoimpl.X.MessageStringOf(x)
-}
-
-func (*JGFResponse) ProtoMessage() {}
-
-func (x *JGFResponse) ProtoReflect() protoreflect.Message {
-	mi := &file_fluence_fluxcli_grpc_fluxcli_proto_msgTypes[8]
-	if protoimpl.UnsafeEnabled && x != nil {
-		ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
-		if ms.LoadMessageInfo() == nil {
-			ms.StoreMessageInfo(mi)
-		}
-		return ms
-	}
-	return mi.MessageOf(x)
-}
-
-// Deprecated: Use JGFResponse.ProtoReflect.Descriptor instead.
-func (*JGFResponse) Descriptor() ([]byte, []int) {
-	return file_fluence_fluxcli_grpc_fluxcli_proto_rawDescGZIP(), []int{8}
-}
-
-func (x *JGFResponse) GetJgf() string {
-	if x != nil {
-		return x.Jgf
-	}
-	return ""
-}
-
-var File_fluence_fluxcli_grpc_fluxcli_proto protoreflect.FileDescriptor
-
-var file_fluence_fluxcli_grpc_fluxcli_proto_rawDesc = []byte{
-	0x0a, 0x22, 0x66, 0x6c, 0x75, 0x65, 0x6e, 0x63, 0x65, 0x2f, 0x66, 0x6c, 0x75, 0x78, 0x63, 0x6c,
-	0x69, 0x2d, 0x67, 0x72, 0x70, 0x63, 0x2f, 0x66, 0x6c, 0x75, 0x78, 0x63, 0x6c, 0x69, 0x2e, 0x70,
-	0x72, 0x6f, 0x74, 0x6f, 0x12, 0x07, 0x66, 0x6c, 0x75, 0x78, 0x63, 0x6c, 0x69, 0x22, 0xa5, 0x01,
-	0x0a, 0x07, 0x50, 0x6f, 0x64, 0x53, 0x70, 0x65, 0x63, 0x12, 0x0e, 0x0a, 0x02, 0x69, 0x64, 0x18,
-	0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x02, 0x69, 0x64, 0x12, 0x1c, 0x0a, 0x09, 0x63, 0x6f, 0x6e,
-	0x74, 0x61, 0x69, 0x6e, 0x65, 0x72, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x09, 0x63, 0x6f,
-	0x6e, 0x74, 0x61, 0x69, 0x6e, 0x65, 0x72, 0x12, 0x10, 0x0a, 0x03, 0x63, 0x70, 0x75, 0x18, 0x03,
-	0x20, 0x01, 0x28, 0x05, 0x52, 0x03, 0x63, 0x70, 0x75, 0x12, 0x16, 0x0a, 0x06, 0x6d, 0x65, 0x6d,
-	0x6f, 0x72, 0x79, 0x18, 0x04, 0x20, 0x01, 0x28, 0x03, 0x52, 0x06, 0x6d, 0x65, 0x6d, 0x6f, 0x72,
-	0x79, 0x12, 0x10, 0x0a, 0x03, 0x67, 0x70, 0x75, 0x18, 0x05, 0x20, 0x01, 0x28, 0x03, 0x52, 0x03,
-	0x67, 0x70, 0x75, 0x12, 0x18, 0x0a, 0x07, 0x73, 0x74, 0x6f, 0x72, 0x61, 0x67, 0x65, 0x18, 0x06,
-	0x20, 0x01, 0x28, 0x03, 0x52, 0x07, 0x73, 0x74, 0x6f, 0x72, 0x61, 0x67, 0x65, 0x12, 0x16, 0x0a,
-	0x06, 0x6c, 0x61, 0x62, 0x65, 0x6c, 0x73, 0x18, 0x07, 0x20, 0x03, 0x28, 0x09, 0x52, 0x06, 0x6c,
-	0x61, 0x62, 0x65, 0x6c, 0x73, 0x22, 0x60, 0x0a, 0x0c, 0x4d, 0x61, 0x74, 0x63, 0x68, 0x52, 0x65,
-	0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x20, 0x0a, 0x02, 0x70, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28,
-	0x0b, 0x32, 0x10, 0x2e, 0x66, 0x6c, 0x75, 0x78, 0x63, 0x6c, 0x69, 0x2e, 0x50, 0x6f, 0x64, 0x53,
-	0x70, 0x65, 0x63, 0x52, 0x02, 0x70, 0x73, 0x12, 0x18, 0x0a, 0x07, 0x72, 0x65, 0x71, 0x75, 0x65,
-	0x73, 0x74, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x07, 0x72, 0x65, 0x71, 0x75, 0x65, 0x73,
-	0x74, 0x12, 0x14, 0x0a, 0x05, 0x63, 0x6f, 0x75, 0x6e, 0x74, 0x18, 0x03, 0x20, 0x01, 0x28, 0x05,
-	0x52, 0x05, 0x63, 0x6f, 0x75, 0x6e, 0x74, 0x22, 0x39, 0x0a, 0x09, 0x4e, 0x6f, 0x64, 0x65, 0x41,
-	0x6c, 0x6c, 0x6f, 0x63, 0x12, 0x16, 0x0a, 0x06, 0x6e, 0x6f, 0x64, 0x65, 0x49, 0x44, 0x18, 0x01,
-	0x20, 0x01, 0x28, 0x09, 0x52, 0x06, 0x6e, 0x6f, 0x64, 0x65, 0x49, 0x44, 0x12, 0x14, 0x0a, 0x05,
-	0x74, 0x61, 0x73, 0x6b, 0x73, 0x18, 0x02, 0x20, 0x01, 0x28, 0x05, 0x52, 0x05, 0x74, 0x61, 0x73,
-	0x6b, 0x73, 0x22, 0x6b, 0x0a, 0x0d, 0x4d, 0x61, 0x74, 0x63, 0x68, 0x52, 0x65, 0x73, 0x70, 0x6f,
-	0x6e, 0x73, 0x65, 0x12, 0x14, 0x0a, 0x05, 0x70, 0x6f, 0x64, 0x49, 0x44, 0x18, 0x01, 0x20, 0x01,
-	0x28, 0x09, 0x52, 0x05, 0x70, 0x6f, 0x64, 0x49, 0x44, 0x12, 0x2e, 0x0a, 0x08, 0x6e, 0x6f, 0x64,
-	0x65, 0x6c, 0x69, 0x73, 0x74, 0x18, 0x02, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x12, 0x2e, 0x66, 0x6c,
-	0x75, 0x78, 0x63, 0x6c, 0x69, 0x2e, 0x4e, 0x6f, 0x64, 0x65, 0x41, 0x6c, 0x6c, 0x6f, 0x63, 0x52,
-	0x08, 0x6e, 0x6f, 0x64, 0x65, 0x6c, 0x69, 0x73, 0x74, 0x12, 0x14, 0x0a, 0x05, 0x6a, 0x6f, 0x62,
-	0x49, 0x44, 0x18, 0x03, 0x20, 0x01, 0x28, 0x03, 0x52, 0x05, 0x6a, 0x6f, 0x62, 0x49, 0x44, 0x22,
-	0x25, 0x0a, 0x0d, 0x43, 0x61, 0x6e, 0x63, 0x65, 0x6c, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74,
-	0x12, 0x14, 0x0a, 0x05, 0x6a, 0x6f, 0x62, 0x49, 0x44, 0x18, 0x02, 0x20, 0x01, 0x28, 0x03, 0x52,
-	0x05, 0x6a, 0x6f, 0x62, 0x49, 0x44, 0x22, 0x3c, 0x0a, 0x0e, 0x43, 0x61, 0x6e, 0x63, 0x65, 0x6c,
-	0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, 0x14, 0x0a, 0x05, 0x6a, 0x6f, 0x62, 0x49,
-	0x44, 0x18, 0x01, 0x20, 0x01, 0x28, 0x03, 0x52, 0x05, 0x6a, 0x6f, 0x62, 0x49, 0x44, 0x12, 0x14,
-	0x0a, 0x05, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x18, 0x02, 0x20, 0x01, 0x28, 0x05, 0x52, 0x05, 0x65,
-	0x72, 0x72, 0x6f, 0x72, 0x22, 0xe6, 0x01, 0x0a, 0x0a, 0x4e, 0x6f, 0x64, 0x65, 0x53, 0x74, 0x61,
-	0x74, 0x75, 0x73, 0x12, 0x1a, 0x0a, 0x08, 0x63, 0x70, 0x75, 0x41, 0x76, 0x61, 0x69, 0x6c, 0x18,
-	0x01, 0x20, 0x01, 0x28, 0x05, 0x52, 0x08, 0x63, 0x70, 0x75, 0x41, 0x76, 0x61, 0x69, 0x6c, 0x12,
-	0x1a, 0x0a, 0x08, 0x67, 0x70, 0x75, 0x41, 0x76, 0x61, 0x69, 0x6c, 0x18, 0x02, 0x20, 0x01, 0x28,
-	0x05, 0x52, 0x08, 0x67, 0x70, 0x75, 0x41, 0x76, 0x61, 0x69, 0x6c, 0x12, 0x22, 0x0a, 0x0c, 0x73,
-	0x74, 0x6f, 0x72, 0x61, 0x67, 0x65, 0x41, 0x76, 0x61, 0x69, 0x6c, 0x18, 0x03, 0x20, 0x01, 0x28,
-	0x03, 0x52, 0x0c, 0x73, 0x74, 0x6f, 0x72, 0x61, 0x67, 0x65, 0x41, 0x76, 0x61, 0x69, 0x6c, 0x12,
-	0x20, 0x0a, 0x0b, 0x6d, 0x65, 0x6d, 0x6f, 0x72, 0x79, 0x41, 0x76, 0x61, 0x69, 0x6c, 0x18, 0x04,
-	0x20, 0x01, 0x28, 0x03, 0x52, 0x0b, 0x6d, 0x65, 0x6d, 0x6f, 0x72, 0x79, 0x41, 0x76, 0x61, 0x69,
-	0x6c, 0x12, 0x20, 0x0a, 0x0b, 0x61, 0x6c, 0x6c, 0x6f, 0x77, 0x65, 0x64, 0x50, 0x6f, 0x64, 0x73,
-	0x18, 0x05, 0x20, 0x01, 0x28, 0x03, 0x52, 0x0b, 0x61, 0x6c, 0x6c, 0x6f, 0x77, 0x65, 0x64, 0x50,
-	0x6f, 0x64, 0x73, 0x12, 0x16, 0x0a, 0x06, 0x6e, 0x6f, 0x64, 0x65, 0x49, 0x50, 0x18, 0x06, 0x20,
-	0x01, 0x28, 0x09, 0x52, 0x06, 0x6e, 0x6f, 0x64, 0x65, 0x49, 0x50, 0x12, 0x20, 0x0a, 0x0b, 0x72,
-	0x65, 0x70, 0x6c, 0x69, 0x63, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x18, 0x07, 0x20, 0x01, 0x28, 0x05,
-	0x52, 0x0b, 0x72, 0x65, 0x70, 0x6c, 0x69, 0x63, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x22, 0x1e, 0x0a,
-	0x0a, 0x4a, 0x47, 0x46, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x10, 0x0a, 0x03, 0x6a,
-	0x67, 0x66, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x6a, 0x67, 0x66, 0x22, 0x1f, 0x0a,
-	0x0b, 0x4a, 0x47, 0x46, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, 0x10, 0x0a, 0x03,
-	0x6a, 0x67, 0x66, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x6a, 0x67, 0x66, 0x32, 0x87,
-	0x01, 0x0a, 0x0e, 0x46, 0x6c, 0x75, 0x78, 0x63, 0x6c, 0x69, 0x53, 0x65, 0x72, 0x76, 0x69, 0x63,
-	0x65, 0x12, 0x38, 0x0a, 0x05, 0x4d, 0x61, 0x74, 0x63, 0x68, 0x12, 0x15, 0x2e, 0x66, 0x6c, 0x75,
-	0x78, 0x63, 0x6c, 0x69, 0x2e, 0x4d, 0x61, 0x74, 0x63, 0x68, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73,
-	0x74, 0x1a, 0x16, 0x2e, 0x66, 0x6c, 0x75, 0x78, 0x63, 0x6c, 0x69, 0x2e, 0x4d, 0x61, 0x74, 0x63,
-	0x68, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x22, 0x00, 0x12, 0x3b, 0x0a, 0x06, 0x43,
-	0x61, 0x6e, 0x63, 0x65, 0x6c, 0x12, 0x16, 0x2e, 0x66, 0x6c, 0x75, 0x78, 0x63, 0x6c, 0x69, 0x2e,
-	0x43, 0x61, 0x6e, 0x63, 0x65, 0x6c, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x17, 0x2e,
-	0x66, 0x6c, 0x75, 0x78, 0x63, 0x6c, 0x69, 0x2e, 0x43, 0x61, 0x6e, 0x63, 0x65, 0x6c, 0x52, 0x65,
-	0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x22, 0x00, 0x42, 0x0e, 0x5a, 0x0c, 0x67, 0x72, 0x70, 0x63,
-	0x2f, 0x66, 0x6c, 0x75, 0x78, 0x63, 0x6c, 0x69, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33,
-}
-
-var (
-	file_fluence_fluxcli_grpc_fluxcli_proto_rawDescOnce sync.Once
-	file_fluence_fluxcli_grpc_fluxcli_proto_rawDescData = file_fluence_fluxcli_grpc_fluxcli_proto_rawDesc
-)
-
-func file_fluence_fluxcli_grpc_fluxcli_proto_rawDescGZIP() []byte {
-	file_fluence_fluxcli_grpc_fluxcli_proto_rawDescOnce.Do(func() {
-		file_fluence_fluxcli_grpc_fluxcli_proto_rawDescData = protoimpl.X.CompressGZIP(file_fluence_fluxcli_grpc_fluxcli_proto_rawDescData)
-	})
-	return file_fluence_fluxcli_grpc_fluxcli_proto_rawDescData
-}
-
-var file_fluence_fluxcli_grpc_fluxcli_proto_msgTypes = make([]protoimpl.MessageInfo, 9)
-var file_fluence_fluxcli_grpc_fluxcli_proto_goTypes = []interface{}{
-	(*PodSpec)(nil),        // 0: fluxcli.PodSpec
-	(*MatchRequest)(nil),   // 1: fluxcli.MatchRequest
-	(*NodeAlloc)(nil),      // 2: fluxcli.NodeAlloc
-	(*MatchResponse)(nil),  // 3: fluxcli.MatchResponse
-	(*CancelRequest)(nil),  // 4: fluxcli.CancelRequest
-	(*CancelResponse)(nil), // 5: fluxcli.CancelResponse
-	(*NodeStatus)(nil),     // 6: fluxcli.NodeStatus
-	(*JGFRequest)(nil),     // 7: fluxcli.JGFRequest
-	(*JGFResponse)(nil),    // 8: fluxcli.JGFResponse
-}
-var file_fluence_fluxcli_grpc_fluxcli_proto_depIdxs = []int32{
-	0, // 0: fluxcli.MatchRequest.ps:type_name -> fluxcli.PodSpec
-	2, // 1: fluxcli.MatchResponse.nodelist:type_name -> fluxcli.NodeAlloc
-	1, // 2: fluxcli.FluxcliService.Match:input_type -> fluxcli.MatchRequest
-	4, // 3: fluxcli.FluxcliService.Cancel:input_type -> fluxcli.CancelRequest
-	3, // 4: fluxcli.FluxcliService.Match:output_type -> fluxcli.MatchResponse
-	5, // 5: fluxcli.FluxcliService.Cancel:output_type -> fluxcli.CancelResponse
-	4, // [4:6] is the sub-list for method output_type
-	2, // [2:4] is the sub-list for method input_type
-	2, // [2:2] is the sub-list for extension type_name
-	2, // [2:2] is the sub-list for extension extendee
-	0, // [0:2] is the sub-list for field type_name
-}
-
-func init() { file_fluence_fluxcli_grpc_fluxcli_proto_init() }
-func file_fluence_fluxcli_grpc_fluxcli_proto_init() {
-	if File_fluence_fluxcli_grpc_fluxcli_proto != nil {
-		return
-	}
-	if !protoimpl.UnsafeEnabled {
-		file_fluence_fluxcli_grpc_fluxcli_proto_msgTypes[0].Exporter = func(v interface{}, i int) interface{} {
-			switch v := v.(*PodSpec); i {
-			case 0:
-				return &v.state
-			case 1:
-				return &v.sizeCache
-			case 2:
-				return &v.unknownFields
-			default:
-				return nil
-			}
-		}
-		file_fluence_fluxcli_grpc_fluxcli_proto_msgTypes[1].Exporter = func(v interface{}, i int) interface{} {
-			switch v := v.(*MatchRequest); i {
-			case 0:
-				return &v.state
-			case 1:
-				return &v.sizeCache
-			case 2:
-				return &v.unknownFields
-			default:
-				return nil
-			}
-		}
-		file_fluence_fluxcli_grpc_fluxcli_proto_msgTypes[2].Exporter = func(v interface{}, i int) interface{} {
-			switch v := v.(*NodeAlloc); i {
-			case 0:
-				return &v.state
-			case 1:
-				return &v.sizeCache
-			case 2:
-				return &v.unknownFields
-			default:
-				return nil
-			}
-		}
-		file_fluence_fluxcli_grpc_fluxcli_proto_msgTypes[3].Exporter = func(v interface{}, i int) interface{} {
-			switch v := v.(*MatchResponse); i {
-			case 0:
-				return &v.state
-			case 1:
-				return &v.sizeCache
-			case 2:
-				return &v.unknownFields
-			default:
-				return nil
-			}
-		}
-		file_fluence_fluxcli_grpc_fluxcli_proto_msgTypes[4].Exporter = func(v interface{}, i int) interface{} {
-			switch v := v.(*CancelRequest); i {
-			case 0:
-				return &v.state
-			case 1:
-				return &v.sizeCache
-			case 2:
-				return &v.unknownFields
-			default:
-				return nil
-			}
-		}
-		file_fluence_fluxcli_grpc_fluxcli_proto_msgTypes[5].Exporter = func(v interface{}, i int) interface{} {
-			switch v := v.(*CancelResponse); i {
-			case 0:
-				return &v.state
-			case 1:
-				return &v.sizeCache
-			case 2:
-				return &v.unknownFields
-			default:
-				return nil
-			}
-		}
-		file_fluence_fluxcli_grpc_fluxcli_proto_msgTypes[6].Exporter = func(v interface{}, i int) interface{} {
-			switch v := v.(*NodeStatus); i {
-			case 0:
-				return &v.state
-			case 1:
-				return &v.sizeCache
-			case 2:
-				return &v.unknownFields
-			default:
-				return nil
-			}
-		}
-		file_fluence_fluxcli_grpc_fluxcli_proto_msgTypes[7].Exporter = func(v interface{}, i int) interface{} {
-			switch v := v.(*JGFRequest); i {
-			case 0:
-				return &v.state
-			case 1:
-				return &v.sizeCache
-			case 2:
-				return &v.unknownFields
-			default:
-				return nil
-			}
-		}
-		file_fluence_fluxcli_grpc_fluxcli_proto_msgTypes[8].Exporter = func(v interface{}, i int) interface{} {
-			switch v := v.(*JGFResponse); i {
-			case 0:
-				return &v.state
-			case 1:
-				return &v.sizeCache
-			case 2:
-				return &v.unknownFields
-			default:
-				return nil
-			}
-		}
-	}
-	type x struct{}
-	out := protoimpl.TypeBuilder{
-		File: protoimpl.DescBuilder{
-			GoPackagePath: reflect.TypeOf(x{}).PkgPath(),
-			RawDescriptor: file_fluence_fluxcli_grpc_fluxcli_proto_rawDesc,
-			NumEnums:      0,
-			NumMessages:   9,
-			NumExtensions: 0,
-			NumServices:   1,
-		},
-		GoTypes:           file_fluence_fluxcli_grpc_fluxcli_proto_goTypes,
-		DependencyIndexes: file_fluence_fluxcli_grpc_fluxcli_proto_depIdxs,
-		MessageInfos:      file_fluence_fluxcli_grpc_fluxcli_proto_msgTypes,
-	}.Build()
-	File_fluence_fluxcli_grpc_fluxcli_proto = out.File
-	file_fluence_fluxcli_grpc_fluxcli_proto_rawDesc = nil
-	file_fluence_fluxcli_grpc_fluxcli_proto_goTypes = nil
-	file_fluence_fluxcli_grpc_fluxcli_proto_depIdxs = nil
-}
diff --git a/sig-scheduler-plugins/pkg/fluence/fluxcli-grpc/fluxcli.proto b/sig-scheduler-plugins/pkg/fluence/fluxcli-grpc/fluxcli.proto
deleted file mode 100644
index f47d35b..0000000
--- a/sig-scheduler-plugins/pkg/fluence/fluxcli-grpc/fluxcli.proto
+++ /dev/null
@@ -1,76 +0,0 @@
-syntax = "proto3";
-option go_package = "grpc/fluxcli";
-
-package fluxcli;
-
-
-// Service definition
-service FluxcliService {
-    // Sends a Match command
-    rpc Match(MatchRequest) returns (MatchResponse) {}
-    rpc Cancel(CancelRequest) returns (CancelResponse) {}
-}
-
-message PodSpec {
-    string id = 1;
-    string container = 2;
-    int32 cpu = 3;
-    int64 memory = 4;
-    int64 gpu = 5;
-    int64 storage = 6; 
-    repeated string labels = 7;
-}
-
-// The Match request message (allocate, allocate_orelse_reserve)
-message MatchRequest {
-    PodSpec ps = 1;
-    string request = 2;
-    int32 count = 3;
-}
-
-// The Nodes/Cluster Update Status
-message NodeAlloc {
-    string nodeID = 1;
-    int32 tasks = 2;
-}
-
-// The Match response message
-message MatchResponse {
-    string podID = 1;
-    repeated NodeAlloc nodelist = 2;
-    int64 jobID = 3;
-}
-
-message CancelRequest {
-    int64 jobID = 2;
-}
-
-// The Match response message
-message CancelResponse {
-    int64 jobID = 1;
-    int32 error = 2;
-}
-
-
-
-// The Nodes/Cluster Update Status
-message NodeStatus {
-    int32 cpuAvail = 1;
-    int32 gpuAvail = 2;
-    int64 storageAvail = 3;
-    int64 memoryAvail = 4;
-    int64 allowedPods = 5;
-    string nodeIP = 6;
-    int32 replication = 7;
-}
-
-// The JGF response message
-message JGFRequest {
-    string jgf = 1;
-}
-
-
-// The JGF response message
-message JGFResponse {
-    string jgf = 1;
-}
diff --git a/sig-scheduler-plugins/pkg/fluence/fluxcli-grpc/fluxcli_grpc.pb.go b/sig-scheduler-plugins/pkg/fluence/fluxcli-grpc/fluxcli_grpc.pb.go
deleted file mode 100644
index 7bd905a..0000000
--- a/sig-scheduler-plugins/pkg/fluence/fluxcli-grpc/fluxcli_grpc.pb.go
+++ /dev/null
@@ -1,139 +0,0 @@
-// Code generated by protoc-gen-go-grpc. DO NOT EDIT.
-
-package fluxcli
-
-import (
-	context "context"
-	grpc "google.golang.org/grpc"
-	codes "google.golang.org/grpc/codes"
-	status "google.golang.org/grpc/status"
-)
-
-// This is a compile-time assertion to ensure that this generated file
-// is compatible with the grpc package it is being compiled against.
-// Requires gRPC-Go v1.32.0 or later.
-const _ = grpc.SupportPackageIsVersion7
-
-// FluxcliServiceClient is the client API for FluxcliService service.
-//
-// For semantics around ctx use and closing/ending streaming RPCs, please refer to https://pkg.go.dev/google.golang.org/grpc/?tab=doc#ClientConn.NewStream.
-type FluxcliServiceClient interface {
-	// Sends a Match command
-	Match(ctx context.Context, in *MatchRequest, opts ...grpc.CallOption) (*MatchResponse, error)
-	Cancel(ctx context.Context, in *CancelRequest, opts ...grpc.CallOption) (*CancelResponse, error)
-}
-
-type fluxcliServiceClient struct {
-	cc grpc.ClientConnInterface
-}
-
-func NewFluxcliServiceClient(cc grpc.ClientConnInterface) FluxcliServiceClient {
-	return &fluxcliServiceClient{cc}
-}
-
-func (c *fluxcliServiceClient) Match(ctx context.Context, in *MatchRequest, opts ...grpc.CallOption) (*MatchResponse, error) {
-	out := new(MatchResponse)
-	err := c.cc.Invoke(ctx, "/fluxcli.FluxcliService/Match", in, out, opts...)
-	if err != nil {
-		return nil, err
-	}
-	return out, nil
-}
-
-func (c *fluxcliServiceClient) Cancel(ctx context.Context, in *CancelRequest, opts ...grpc.CallOption) (*CancelResponse, error) {
-	out := new(CancelResponse)
-	err := c.cc.Invoke(ctx, "/fluxcli.FluxcliService/Cancel", in, out, opts...)
-	if err != nil {
-		return nil, err
-	}
-	return out, nil
-}
-
-// FluxcliServiceServer is the server API for FluxcliService service.
-// All implementations must embed UnimplementedFluxcliServiceServer
-// for forward compatibility
-type FluxcliServiceServer interface {
-	// Sends a Match command
-	Match(context.Context, *MatchRequest) (*MatchResponse, error)
-	Cancel(context.Context, *CancelRequest) (*CancelResponse, error)
-	mustEmbedUnimplementedFluxcliServiceServer()
-}
-
-// UnimplementedFluxcliServiceServer must be embedded to have forward compatible implementations.
-type UnimplementedFluxcliServiceServer struct {
-}
-
-func (UnimplementedFluxcliServiceServer) Match(context.Context, *MatchRequest) (*MatchResponse, error) {
-	return nil, status.Errorf(codes.Unimplemented, "method Match not implemented")
-}
-func (UnimplementedFluxcliServiceServer) Cancel(context.Context, *CancelRequest) (*CancelResponse, error) {
-	return nil, status.Errorf(codes.Unimplemented, "method Cancel not implemented")
-}
-func (UnimplementedFluxcliServiceServer) mustEmbedUnimplementedFluxcliServiceServer() {}
-
-// UnsafeFluxcliServiceServer may be embedded to opt out of forward compatibility for this service.
-// Use of this interface is not recommended, as added methods to FluxcliServiceServer will
-// result in compilation errors.
-type UnsafeFluxcliServiceServer interface {
-	mustEmbedUnimplementedFluxcliServiceServer()
-}
-
-func RegisterFluxcliServiceServer(s grpc.ServiceRegistrar, srv FluxcliServiceServer) {
-	s.RegisterService(&FluxcliService_ServiceDesc, srv)
-}
-
-func _FluxcliService_Match_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
-	in := new(MatchRequest)
-	if err := dec(in); err != nil {
-		return nil, err
-	}
-	if interceptor == nil {
-		return srv.(FluxcliServiceServer).Match(ctx, in)
-	}
-	info := &grpc.UnaryServerInfo{
-		Server:     srv,
-		FullMethod: "/fluxcli.FluxcliService/Match",
-	}
-	handler := func(ctx context.Context, req interface{}) (interface{}, error) {
-		return srv.(FluxcliServiceServer).Match(ctx, req.(*MatchRequest))
-	}
-	return interceptor(ctx, in, info, handler)
-}
-
-func _FluxcliService_Cancel_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
-	in := new(CancelRequest)
-	if err := dec(in); err != nil {
-		return nil, err
-	}
-	if interceptor == nil {
-		return srv.(FluxcliServiceServer).Cancel(ctx, in)
-	}
-	info := &grpc.UnaryServerInfo{
-		Server:     srv,
-		FullMethod: "/fluxcli.FluxcliService/Cancel",
-	}
-	handler := func(ctx context.Context, req interface{}) (interface{}, error) {
-		return srv.(FluxcliServiceServer).Cancel(ctx, req.(*CancelRequest))
-	}
-	return interceptor(ctx, in, info, handler)
-}
-
-// FluxcliService_ServiceDesc is the grpc.ServiceDesc for FluxcliService service.
-// It's only intended for direct use with grpc.RegisterService,
-// and not to be introspected or modified (even as a copy)
-var FluxcliService_ServiceDesc = grpc.ServiceDesc{
-	ServiceName: "fluxcli.FluxcliService",
-	HandlerType: (*FluxcliServiceServer)(nil),
-	Methods: []grpc.MethodDesc{
-		{
-			MethodName: "Match",
-			Handler:    _FluxcliService_Match_Handler,
-		},
-		{
-			MethodName: "Cancel",
-			Handler:    _FluxcliService_Cancel_Handler,
-		},
-	},
-	Streams:  []grpc.StreamDesc{},
-	Metadata: "fluence/fluxcli-grpc/fluxcli.proto",
-}
diff --git a/sig-scheduler-plugins/pkg/fluence/utils/utils.go b/sig-scheduler-plugins/pkg/fluence/utils/utils.go
index cfb857d..53e9c4a 100644
--- a/sig-scheduler-plugins/pkg/fluence/utils/utils.go
+++ b/sig-scheduler-plugins/pkg/fluence/utils/utils.go
@@ -17,60 +17,83 @@ limitations under the License.
 package utils
 
 import (
+	"fmt"
 	"strings"
 
 	v1 "k8s.io/api/core/v1"
 	"k8s.io/klog/v2"
-	"k8s.io/kubernetes/pkg/scheduler/framework"
 	pb "sigs.k8s.io/scheduler-plugins/pkg/fluence/fluxcli-grpc"
 )
 
-type NoopStateData struct{}
+// TODO this package should be renamed something related to a PodSpec Info
 
-func NewNoopStateData() framework.StateData {
-	return &NoopStateData{}
-}
-
-func (d *NoopStateData) Clone() framework.StateData {
-	return d
+// getPodJobpsecLabels looks across labels and returns those relevant
+// to a jobspec
+func getPodJobspecLabels(pod *v1.Pod) []string {
+	labels := []string{}
+	for label, value := range pod.Labels {
+		if strings.Contains(label, "jobspec") {
+			labels = append(labels, value)
+		}
+	}
+	return labels
 }
 
 // InspectPodInfo takes a pod object and returns the pod.spec
+// Note from vsoch - I updated this to calculate containers across the pod
+// if that's wrong we can change it back.
 func InspectPodInfo(pod *v1.Pod) *pb.PodSpec {
 	ps := new(pb.PodSpec)
 	ps.Id = pod.Name
-	cont := pod.Spec.Containers[0]
-
-	//This will need to be done here AND at client level
-	if len(pod.Labels) > 0 {
-		r := make([]string, 0)
-		for key, val := range pod.Labels {
-			if strings.Contains(key, "jobspec") {
-				r = append(r, val)
-			}
-		}
-		if len(r) > 0 {
-			ps.Labels = r
-		}
-	}
 
-	specRequests := cont.Resources.Requests
-	specLimits := cont.Resources.Limits
+	// Note from vsoch - there was an if check here to see if we had labels,
+	// I don't think there is risk to adding an empty list but we can add
+	// the check back if there is
+	ps.Labels = getPodJobspecLabels(pod)
+
+	// Note that Container gets use for the JobSpec, so we provide
+	// the pod name (to be associated with tasks) for it. We likely
+	// should change this identifier eventually.
+	ps.Container = fmt.Sprintf("%s-%s", pod.Namespace, pod.Name)
+
+	// Create accumulated requests for cpu and limits
+	// CPU and memory are summed across containers
+	// GPU cannot be shared across containers, but we
+	// take a count for the pod for the PodSpec
+	var cpus int32 = 0
+	var memory int64 = 0
+	var gpus int64 = 0
+
+	// I think we are OK to sum this too
+	// https://github.com/kubernetes/kubectl/blob/master/pkg/describe/describe.go#L4211-L4213
+	var storage int64 = 0
+
+	for _, container := range pod.Spec.Containers {
+
+		// Add on Cpu, Memory, GPU from container requests
+		// This is a limited set of resources owned by the pod
+		specRequests := container.Resources.Requests
+		cpus += int32(specRequests.Cpu().Value())
+		memory += specRequests.Memory().Value()
+		storage += specRequests.StorageEphemeral().Value()
+
+		specLimits := container.Resources.Limits
+		gpuSpec := specLimits["nvidia.com/gpu"]
+		gpus += gpuSpec.Value()
 
-	if specRequests.Cpu().Value() == 0 {
-		ps.Cpu = 1
-	} else {
-		ps.Cpu = int32(specRequests.Cpu().Value())
 	}
 
-	if specRequests.Memory().Value() > 0 {
-		ps.Memory = specRequests.Memory().Value()
+	// If we have zero cpus, assume 1
+	// We could use math.Max here, but it is expecting float64
+	if cpus == 0 {
+		cpus = 1
 	}
-	gpu := specLimits["nvidia.com/gpu"]
-	ps.Gpu = gpu.Value()
-	ps.Storage = specRequests.StorageEphemeral().Value()
-
-	klog.Infof("[Jobspec] Pod spec: CPU %v/%v-milli, memory %v, GPU %v, storage %v", ps.Cpu, specRequests.Cpu().MilliValue(), specRequests.Memory().Value(), ps.Gpu, ps.Storage)
+	ps.Cpu = cpus
+	ps.Gpu = gpus
+	ps.Memory = memory
+	ps.Storage = storage
 
+	// I removed specRequests.Cpu().MilliValue() but we can add back some derivative if desired
+	klog.Infof("[Jobspec] Pod spec: CPU %v, memory %v, GPU %v, storage %v", ps.Cpu, ps.Memory, ps.Gpu, ps.Storage)
 	return ps
 }
diff --git a/src/Makefile b/src/Makefile
index a32efce..344bde1 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -1,6 +1,7 @@
 
 FLUX_SCHED_ROOT ?= /home/flux-sched
 INSTALL_PREFIX ?= /usr
+LOCALBIN ?= $(shell pwd)/bin
 COMMONENVVAR=GOOS=$(shell uname -s | tr A-Z a-z)
 
 # This is what worked
@@ -14,6 +15,10 @@ RELEASE_VERSION?=v$(shell date +%Y%m%d)-$(shell git describe --tags --match "v*"
 .PHONY: all
 all: fluxcli
 
+.PHONY: $(LOCALBIN)
+$(LOCALBIN):
+	mkdir -p $(LOCALBIN)
+
 .PHONY: fluxcli
 fluxcli: 
 	docker build -f build/scheduler/Dockerfile --build-arg ARCH="amd64" --build-arg RELEASE_VERSION="$(RELEASE_VERSION)" -t $(LOCAL_REGISTRY)/$(LOCAL_IMAGE) .
@@ -22,6 +27,12 @@ fluxcli:
 server: 
 	$(COMMONENVVAR) $(BUILDENVVAR) go build -ldflags '-w' -o bin/server cmd/main.go
 
+.PHONY: protoc
+protoc: $(LOCALBIN)
+	GOBIN=$(LOCALBIN) go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.28
+	GOBIN=$(LOCALBIN) go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@v1.2
+	
+# You can use make protoc to download proto
 .PHONY: proto
-proto:
-	protoc --go_out=. --go_opt=paths=source_relative --go-grpc_out=. --go-grpc_opt=paths=source_relative fluence/fluxcli-grpc/fluxcli.proto
+proto: protoc
+	PATH=$(LOCALBIN):${PATH} protoc --go_out=. --go_opt=paths=source_relative --go-grpc_out=. --go-grpc_opt=paths=source_relative fluence/fluxcli-grpc/fluxcli.proto
diff --git a/src/build/scheduler/Dockerfile b/src/build/scheduler/Dockerfile
index 18c4bd7..15a9678 100644
--- a/src/build/scheduler/Dockerfile
+++ b/src/build/scheduler/Dockerfile
@@ -132,6 +132,7 @@ WORKDIR /go/src/fluence/
 # This is the 0.31.0 tag of flux-sched (same as we install above)
 RUN go get -u github.com/flux-framework/flux-sched/resource/reapi/bindings/go/src/fluxcli@250eac78a6753253fc8353a3504d7e843d1b6b24 && \
     go mod tidy && \
+    go mod vendor && \
     make server FLUX_SCHED_ROOT=/home/flux-sched INSTALL_PREFIX=${INSTALL_PREFIX} && \
     mkdir -p /home/data/jobspecs /home/data/jgf && \
     chmod -R ugo+rwx /home/data
\ No newline at end of file
diff --git a/src/fluence/fluxcli-grpc/fluxcli.pb.go b/src/fluence/fluxcli-grpc/fluxcli.pb.go
index e317af2..6bd47d4 100644
--- a/src/fluence/fluxcli-grpc/fluxcli.pb.go
+++ b/src/fluence/fluxcli-grpc/fluxcli.pb.go
@@ -1,7 +1,7 @@
 // Code generated by protoc-gen-go. DO NOT EDIT.
 // versions:
-// 	protoc-gen-go v1.26.0
-// 	protoc        v3.15.8
+// 	protoc-gen-go v1.28.1
+// 	protoc        v3.20.3
 // source: fluence/fluxcli-grpc/fluxcli.proto
 
 package fluxcli
diff --git a/src/fluence/fluxcli-grpc/fluxcli.proto b/src/fluence/fluxcli-grpc/fluxcli.proto
index f47d35b..f85b558 100644
--- a/src/fluence/fluxcli-grpc/fluxcli.proto
+++ b/src/fluence/fluxcli-grpc/fluxcli.proto
@@ -73,4 +73,4 @@ message JGFRequest {
 // The JGF response message
 message JGFResponse {
     string jgf = 1;
-}
+}
\ No newline at end of file
diff --git a/src/fluence/fluxcli-grpc/fluxcli_grpc.pb.go b/src/fluence/fluxcli-grpc/fluxcli_grpc.pb.go
index 7bd905a..f984b04 100644
--- a/src/fluence/fluxcli-grpc/fluxcli_grpc.pb.go
+++ b/src/fluence/fluxcli-grpc/fluxcli_grpc.pb.go
@@ -1,4 +1,8 @@
 // Code generated by protoc-gen-go-grpc. DO NOT EDIT.
+// versions:
+// - protoc-gen-go-grpc v1.2.0
+// - protoc             v3.20.3
+// source: fluence/fluxcli-grpc/fluxcli.proto
 
 package fluxcli
 
diff --git a/src/fluence/fluxion/fluxion.go b/src/fluence/fluxion/fluxion.go
index 7ef532a..6478602 100644
--- a/src/fluence/fluxion/fluxion.go
+++ b/src/fluence/fluxion/fluxion.go
@@ -18,6 +18,7 @@ type Fluxion struct {
 	pb.UnimplementedFluxcliServiceServer
 }
 
+// InitFluxion creates a new client to interaction with the fluxion API (via go bindings)
 func (f *Fluxion) InitFluxion(policy *string, label *string) {
 	f.cli = fluxcli.NewReapiClient()
 
@@ -44,7 +45,9 @@ func (f *Fluxion) InitFluxion(policy *string, label *string) {
 	f.cli.InitContext(string(jgf), p)
 }
 
+// Cancel wraps the Cancel function of the fluxion go bindings
 func (s *Fluxion) Cancel(ctx context.Context, in *pb.CancelRequest) (*pb.CancelResponse, error) {
+
 	fmt.Printf("[GRPCServer] Received Cancel request %v\n", in)
 	err := s.cli.Cancel(int64(in.JobID), true)
 	if err != nil {
@@ -55,7 +58,6 @@ func (s *Fluxion) Cancel(ctx context.Context, in *pb.CancelRequest) (*pb.CancelR
 	// This (I think) should be an error code for the specific job
 	dr := &pb.CancelResponse{JobID: in.JobID}
 	fmt.Printf("[GRPCServer] Sending Cancel response %v\n", dr)
-
 	fmt.Printf("[CancelRPC] Errors so far: %s\n", s.cli.GetErrMsg())
 
 	reserved, at, overhead, mode, fluxerr := s.cli.Info(int64(in.JobID))
@@ -66,30 +68,62 @@ func (s *Fluxion) Cancel(ctx context.Context, in *pb.CancelRequest) (*pb.CancelR
 	return dr, nil
 }
 
-func (s *Fluxion) Match(ctx context.Context, in *pb.MatchRequest) (*pb.MatchResponse, error) {
-	filename := "/home/data/jobspecs/jobspec.yaml"
-	jobspec.CreateJobSpecYaml(in.Ps, in.Count, filename)
+// generateJobSpec generates a jobspec for a match request and returns the string
+func (s *Fluxion) generateJobspec(in *pb.MatchRequest) ([]byte, error) {
 
-	spec, err := os.ReadFile(filename)
+	spec := []byte{}
+
+	// Create a temporary file to write and read the jobspec
+	// The first parameter here as the empty string creates in /tmp
+	file, err := os.CreateTemp("", "jobspec.*.yaml")
 	if err != nil {
-		return nil, errors.New("Error reading jobspec")
+		return spec, err
 	}
+	defer os.Remove(file.Name())
+	jobspec.CreateJobSpecYaml(in.Ps, in.Count, file.Name())
 
+	spec, err = os.ReadFile(file.Name())
+	if err != nil {
+		return spec, errors.New("Error reading jobspec")
+	}
+	return spec, err
+}
+
+// Match wraps the MatchAllocate function of the fluxion go bindings
+// If a match is not possible, we return the error and an empty response
+func (s *Fluxion) Match(ctx context.Context, in *pb.MatchRequest) (*pb.MatchResponse, error) {
+
+	emptyResponse := &pb.MatchResponse{}
+
+	// Prepare an empty match response (that can still be serialized)
 	fmt.Printf("[GRPCServer] Received Match request %v\n", in)
+
+	// Generate the jobspec, written to temporary file and read as string
+	spec, err := s.generateJobspec(in)
+	if err != nil {
+		return emptyResponse, err
+	}
+
+	// Ask flux to match allocate!
 	reserved, allocated, at, overhead, jobid, fluxerr := s.cli.MatchAllocate(false, string(spec))
 	utils.PrintOutput(reserved, allocated, at, overhead, jobid, fluxerr)
-
 	fmt.Printf("[MatchRPC] Errors so far: %s\n", s.cli.GetErrMsg())
 	if fluxerr != nil {
-		return nil, errors.New("Error in ReapiCliMatchAllocate")
+		fmt.Printf("[GRPCServer] Flux err is %w\n", fluxerr)
+		return emptyResponse, errors.New("Error in ReapiCliMatchAllocate")
 	}
 
+	// This usually means we cannot allocate
+	// We need to return an error here otherwise we try to pass an empty string
+	// to other RPC endpoints and get back an error.
 	if allocated == "" {
-		return nil, nil
+		fmt.Println("[GRPCServer] Allocated is empty")
+		return emptyResponse, errors.New("allocation was not possible")
 	}
 
-	nodetasks := utils.ParseAllocResult(allocated)
-
+	// Pass the spec name in so we can include it in the allocation result
+	// This will allow us to inspect the ordering later.
+	nodetasks := utils.ParseAllocResult(allocated, in.Ps.Container)
 	nodetaskslist := make([]*pb.NodeAlloc, len(nodetasks))
 	for i, result := range nodetasks {
 		nodetaskslist[i] = &pb.NodeAlloc{
diff --git a/src/fluence/go.mod b/src/fluence/go.mod
index 5a14548..5409a2a 100644
--- a/src/fluence/go.mod
+++ b/src/fluence/go.mod
@@ -1,6 +1,6 @@
 module github.com/flux-framework/flux-k8s/flux-plugin/fluence
 
-go 1.16
+go 1.19
 
 require (
 	github.com/flux-framework/flux-sched/resource/reapi/bindings/go v0.0.0-20231213021445-250eac78a675
@@ -13,6 +13,33 @@ require (
 	k8s.io/kubectl v0.0.0
 )
 
+require (
+	github.com/davecgh/go-spew v1.1.1 // indirect
+	github.com/go-logr/logr v0.4.0 // indirect
+	github.com/gogo/protobuf v1.3.2 // indirect
+	github.com/golang/protobuf v1.5.2 // indirect
+	github.com/google/go-cmp v0.5.5 // indirect
+	github.com/google/gofuzz v1.1.0 // indirect
+	github.com/googleapis/gnostic v0.5.5 // indirect
+	github.com/json-iterator/go v1.1.11 // indirect
+	github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
+	github.com/modern-go/reflect2 v1.0.1 // indirect
+	golang.org/x/net v0.0.0-20210520170846-37e1c6afe023 // indirect
+	golang.org/x/oauth2 v0.0.0-20200107190931-bf48bf16ab8d // indirect
+	golang.org/x/sys v0.0.0-20210616094352-59db8d763f22 // indirect
+	golang.org/x/term v0.0.0-20210220032956-6a3ed077a48d // indirect
+	golang.org/x/text v0.3.6 // indirect
+	golang.org/x/time v0.0.0-20210723032227-1f47c861a9ac // indirect
+	google.golang.org/appengine v1.6.5 // indirect
+	google.golang.org/genproto v0.0.0-20210602131652-f16073e35f0c // indirect
+	gopkg.in/inf.v0 v0.9.1 // indirect
+	gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b // indirect
+	k8s.io/klog/v2 v2.9.0 // indirect
+	k8s.io/utils v0.0.0-20210819203725-bdf08cb9a70a // indirect
+	sigs.k8s.io/structured-merge-diff/v4 v4.1.2 // indirect
+	sigs.k8s.io/yaml v1.2.0 // indirect
+)
+
 replace (
 	k8s.io/api => k8s.io/api v0.22.3
 	k8s.io/apiextensions-apiserver => k8s.io/apiextensions-apiserver v0.22.3
diff --git a/src/fluence/jgf/jgf.go b/src/fluence/jgf/jgf.go
index d12148b..1f45235 100644
--- a/src/fluence/jgf/jgf.go
+++ b/src/fluence/jgf/jgf.go
@@ -5,7 +5,7 @@ Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
 
-    http://www.apache.org/licenses/LICENSE-2.0
+	http://www.apache.org/licenses/LICENSE-2.0
 
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
@@ -23,51 +23,22 @@ import (
 	"strings"
 )
 
-type node struct {
-	Id       string       `json:"id"`
-	Label    string       `json:"label,omitempty"`
-	Metadata nodeMetadata `json:"metadata,omitempty"`
-}
+var (
+	// Defaults for nodes
+	defaultExclusive = false
+	defaultRank      = -1
+	defaultSize      = 1
+	defaultUnit      = ""
 
-type edge struct {
-	Source   string       `json:"source"`
-	Relation string       `json:"relation,omitempty"`
-	Target   string       `json:"target"`
-	Directed bool         `json:"directed,omitempty"`
-	Metadata edgeMetadata `json:"metadata"`
-}
+	// Relations
+	containsRelation = "contains"
+	inRelation       = "in"
 
-type edgeMetadata struct {
-	Name map[string]string `json:"name,omitempty"`
-}
-
-type nodeMetadata struct {
-	Type       string            `json:"type"`
-	Basename   string            `json:"basename"`
-	Name       string            `json:"name"`
-	Id         int               `json:"id"`
-	Uniq_id    int               `json:"uniq_id"`
-	Rank       int               `json:"rank,omitempty"`
-	Exclusive  bool              `json:"exclusive"`
-	Unit       string            `json:"unit"`
-	Size       int               `json:"size"`
-	Paths      map[string]string `json:"paths,omitempty"`
-	Properties map[string]string `json:"properties,omitempty"`
-}
-
-type graph struct {
-	Nodes []node `json:"nodes"`
-	Edges []edge `json:"edges"`
-	//	Metadata metadata 	`json:"metadata,omitempty"`
-	Directed bool `json:"directed,omitempty"`
-}
-
-type Fluxjgf struct {
-	Graph    graph           `json:"graph"`
-	Elements int             `json:"-"`
-	NodeMap  map[string]node `json:"-"`
-}
+	// Paths
+	containmentKey = "containment"
+)
 
+// InitJGF initializes the Flux Json Graph Format object
 func InitJGF() (fluxgraph Fluxjgf) {
 	var g graph
 	fluxgraph = Fluxjgf{
@@ -77,42 +48,49 @@ func InitJGF() (fluxgraph Fluxjgf) {
 	}
 	return
 }
+
+// getDefaultPaths returns a new map with empty containment
+// this cannot be a global shared variable or we get an error
+// about inserting an edge to itself.
+func getDefaultPaths() map[string]string {
+	return map[string]string{containmentKey: ""}
+}
+
+// addNode adds a node to the JGF
 func (g *Fluxjgf) addNode(toadd node) {
 	g.Graph.Nodes = append(g.Graph.Nodes, toadd)
 	g.NodeMap[toadd.Id] = toadd
 	g.Elements = g.Elements + 1
 }
 
+// MakeEdge creates an edge for the JGF
 func (g *Fluxjgf) MakeEdge(source string, target string, contains string) {
 	newedge := edge{
 		Source: source,
 		Target: target,
 		Metadata: edgeMetadata{
-			Name: map[string]string{
-				"containment": contains,
-			},
+			Name: map[string]string{containmentKey: contains},
 		},
 	}
 	g.Graph.Edges = append(g.Graph.Edges, newedge)
-	if contains == "contains" {
+	if contains == containsRelation {
 		tnode := g.NodeMap[target]
-		tnode.Metadata.Paths["containment"] = g.NodeMap[source].Metadata.Paths["containment"] + "/" + tnode.Metadata.Name
+		tnode.Metadata.Paths[containmentKey] = g.NodeMap[source].Metadata.Paths[containmentKey] + "/" + tnode.Metadata.Name
 	}
-
 }
 
+// processLabels selects a subset based on a string filter
 func processLabels(labels *map[string]string, filter string) (filtered map[string]string) {
-	filtered = make(map[string]string, 0)
+	filtered = map[string]string{}
 	for key, v := range *labels {
 		if strings.Contains(key, filter) {
-
 			filtered[key] = v
 		}
 	}
 	return
 }
 
-
+// MakeSubnet creates a subnet for the graph
 func (g *Fluxjgf) MakeSubnet(index int, ip string) string {
 	newnode := node{
 		Id: strconv.Itoa(g.Elements),
@@ -122,19 +100,18 @@ func (g *Fluxjgf) MakeSubnet(index int, ip string) string {
 			Name:      ip + strconv.Itoa(g.Elements),
 			Id:        index,
 			Uniq_id:   g.Elements,
-			Rank:      -1,
-			Exclusive: false,
-			Unit:      "",
-			Size:      1,
-			Paths: map[string]string{
-				"containment": "",
-			},
+			Rank:      defaultRank,
+			Exclusive: defaultExclusive,
+			Unit:      defaultUnit,
+			Size:      defaultSize,
+			Paths:     getDefaultPaths(),
 		},
 	}
 	g.addNode(newnode)
 	return newnode.Id
 }
 
+// MakeNode creates a new node for the graph
 func (g *Fluxjgf) MakeNode(index int, exclusive bool, subnet string) string {
 	newnode := node{
 		Id: strconv.Itoa(g.Elements),
@@ -144,19 +121,18 @@ func (g *Fluxjgf) MakeNode(index int, exclusive bool, subnet string) string {
 			Name:      subnet + strconv.Itoa(g.Elements),
 			Id:        g.Elements,
 			Uniq_id:   g.Elements,
-			Rank:      -1,
+			Rank:      defaultRank,
 			Exclusive: exclusive,
-			Unit:      "",
-			Size:      1,
-			Paths: map[string]string{
-				"containment": "",
-			},
+			Unit:      defaultUnit,
+			Size:      defaultSize,
+			Paths:     getDefaultPaths(),
 		},
 	}
 	g.addNode(newnode)
 	return newnode.Id
 }
 
+// MakeSocket creates a socket for the graph
 func (g *Fluxjgf) MakeSocket(index int, name string) string {
 	newnode := node{
 		Id: strconv.Itoa(g.Elements),
@@ -166,19 +142,18 @@ func (g *Fluxjgf) MakeSocket(index int, name string) string {
 			Name:      name + strconv.Itoa(index),
 			Id:        index,
 			Uniq_id:   g.Elements,
-			Rank:      -1,
-			Exclusive: false,
-			Unit:      "",
-			Size:      1,
-			Paths: map[string]string{
-				"containment": "",
-			},
+			Rank:      defaultRank,
+			Exclusive: defaultExclusive,
+			Unit:      defaultUnit,
+			Size:      defaultSize,
+			Paths:     getDefaultPaths(),
 		},
 	}
 	g.addNode(newnode)
 	return newnode.Id
 }
 
+// MakeCore creates a core for the graph
 func (g *Fluxjgf) MakeCore(index int, name string) string {
 	newnode := node{
 		Id: strconv.Itoa(g.Elements),
@@ -188,19 +163,18 @@ func (g *Fluxjgf) MakeCore(index int, name string) string {
 			Name:      name + strconv.Itoa(index),
 			Id:        index,
 			Uniq_id:   g.Elements,
-			Rank:      -1,
-			Exclusive: false,
-			Unit:      "",
-			Size:      1,
-			Paths: map[string]string{
-				"containment": "",
-			},
+			Rank:      defaultRank,
+			Exclusive: defaultExclusive,
+			Unit:      defaultUnit,
+			Size:      defaultSize,
+			Paths:     getDefaultPaths(),
 		},
 	}
 	g.addNode(newnode)
 	return newnode.Id
 }
 
+// MakeVCore makes a vcore (I think 2 vcpu == 1 cpu) for the graph
 func (g *Fluxjgf) MakeVCore(coreid string, index int, name string) string {
 	newnode := node{
 		Id: strconv.Itoa(g.Elements),
@@ -210,21 +184,20 @@ func (g *Fluxjgf) MakeVCore(coreid string, index int, name string) string {
 			Name:      name + strconv.Itoa(index),
 			Id:        index,
 			Uniq_id:   g.Elements,
-			Rank:      -1,
-			Exclusive: false,
-			Unit:      "",
-			Size:      1,
-			Paths: map[string]string{
-				"containment": "",
-			},
+			Rank:      defaultRank,
+			Exclusive: defaultExclusive,
+			Unit:      defaultUnit,
+			Size:      defaultSize,
+			Paths:     getDefaultPaths(),
 		},
 	}
 	g.addNode(newnode)
-	g.MakeEdge(coreid, newnode.Id, "contains")
-	g.MakeEdge(newnode.Id, coreid, "in")
+	g.MakeEdge(coreid, newnode.Id, containsRelation)
+	g.MakeEdge(newnode.Id, coreid, inRelation)
 	return newnode.Id
 }
 
+// MakeNFProperties makes the node feature discovery properties for the graph
 func (g *Fluxjgf) MakeNFDProperties(coreid string, index int, filter string, labels *map[string]string) {
 	for key, _ := range *labels {
 		if strings.Contains(key, filter) {
@@ -241,17 +214,15 @@ func (g *Fluxjgf) MakeNFDProperties(coreid string, index int, filter string, lab
 					Name:      name + strconv.Itoa(index),
 					Id:        index,
 					Uniq_id:   g.Elements,
-					Rank:      -1,
-					Exclusive: false,
-					Unit:      "",
-					Size:      1,
-					Paths: map[string]string{
-						"containment": "",
-					},
+					Rank:      defaultRank,
+					Exclusive: defaultExclusive,
+					Unit:      defaultUnit,
+					Size:      defaultSize,
+					Paths:     getDefaultPaths(),
 				},
 			}
 			g.addNode(newnode)
-			g.MakeEdge(coreid, newnode.Id, "contains")
+			g.MakeEdge(coreid, newnode.Id, containsRelation)
 		}
 	}
 }
@@ -269,21 +240,20 @@ func (g *Fluxjgf) MakeNFDPropertiesByValue(coreid string, index int, filter stri
 					Name:      name + strconv.Itoa(index),
 					Id:        index,
 					Uniq_id:   g.Elements,
-					Rank:      -1,
-					Exclusive: false,
-					Unit:      "",
-					Size:      1,
-					Paths: map[string]string{
-						"containment": "",
-					},
+					Rank:      defaultRank,
+					Exclusive: defaultExclusive,
+					Unit:      defaultUnit,
+					Size:      defaultSize,
+					Paths:     getDefaultPaths(),
 				},
 			}
 			g.addNode(newnode)
-			g.MakeEdge(coreid, newnode.Id, "contains")
+			g.MakeEdge(coreid, newnode.Id, containsRelation)
 		}
 	}
 }
 
+// MakeMemory creates memory for the graph
 func (g *Fluxjgf) MakeMemory(index int, name string, unit string, size int) string {
 	newnode := node{
 		Id: strconv.Itoa(g.Elements),
@@ -293,19 +263,18 @@ func (g *Fluxjgf) MakeMemory(index int, name string, unit string, size int) stri
 			Name:      name + strconv.Itoa(index),
 			Id:        index,
 			Uniq_id:   g.Elements,
-			Rank:      -1,
-			Exclusive: false,
+			Rank:      defaultRank,
+			Exclusive: defaultExclusive,
 			Unit:      unit,
 			Size:      size,
-			Paths: map[string]string{
-				"containment": "",
-			},
+			Paths:     getDefaultPaths(),
 		},
 	}
 	g.addNode(newnode)
 	return newnode.Id
 }
 
+// MakeGPU makes a gpu for the graph
 func (g *Fluxjgf) MakeGPU(index int, name string, size int) string {
 	newnode := node{
 		Id: strconv.Itoa(g.Elements),
@@ -315,19 +284,18 @@ func (g *Fluxjgf) MakeGPU(index int, name string, size int) string {
 			Name:      name + strconv.Itoa(index),
 			Id:        index,
 			Uniq_id:   g.Elements,
-			Rank:      -1,
-			Exclusive: false,
-			Unit:      "",
+			Rank:      defaultRank,
+			Exclusive: defaultExclusive,
+			Unit:      defaultUnit,
 			Size:      size,
-			Paths: map[string]string{
-				"containment": "",
-			},
+			Paths:     getDefaultPaths(),
 		},
 	}
 	g.addNode(newnode)
 	return newnode.Id
 }
 
+// MakeCluster creates the cluster
 func (g *Fluxjgf) MakeCluster(clustername string) string {
 	g.Elements = 0
 	newnode := node{
@@ -338,12 +306,12 @@ func (g *Fluxjgf) MakeCluster(clustername string) string {
 			Name:      clustername + "0",
 			Id:        g.Elements,
 			Uniq_id:   0,
-			Rank:      -1,
-			Exclusive: false,
-			Unit:      "",
-			Size:      1,
+			Rank:      defaultRank,
+			Exclusive: defaultExclusive,
+			Unit:      defaultUnit,
+			Size:      defaultSize,
 			Paths: map[string]string{
-				"containment": "/" + clustername + "0",
+				containmentKey: "/" + clustername + "0",
 			},
 		},
 	}
@@ -351,6 +319,7 @@ func (g *Fluxjgf) MakeCluster(clustername string) string {
 	return newnode.Id
 }
 
+// MakeRack makes the rack
 func (g *Fluxjgf) MakeRack(id int) string {
 	newnode := node{
 		Id: strconv.Itoa(g.Elements),
@@ -360,13 +329,11 @@ func (g *Fluxjgf) MakeRack(id int) string {
 			Name:      "rack" + strconv.Itoa(id),
 			Id:        id,
 			Uniq_id:   g.Elements,
-			Rank:      -1,
-			Exclusive: false,
-			Unit:      "",
-			Size:      1,
-			Paths: map[string]string{
-				"containment": "",
-			},
+			Rank:      defaultRank,
+			Exclusive: defaultExclusive,
+			Unit:      defaultUnit,
+			Size:      defaultSize,
+			Paths:     getDefaultPaths(),
 		},
 	}
 	g.addNode(newnode)
diff --git a/src/fluence/jgf/types.go b/src/fluence/jgf/types.go
new file mode 100644
index 0000000..b2b743f
--- /dev/null
+++ b/src/fluence/jgf/types.go
@@ -0,0 +1,62 @@
+/*
+Copyright © 2021 IBM Corporation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+	http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package jgf
+
+type node struct {
+	Id       string       `json:"id"`
+	Label    string       `json:"label,omitempty"`
+	Metadata nodeMetadata `json:"metadata,omitempty"`
+}
+
+type edge struct {
+	Source   string       `json:"source"`
+	Relation string       `json:"relation,omitempty"`
+	Target   string       `json:"target"`
+	Directed bool         `json:"directed,omitempty"`
+	Metadata edgeMetadata `json:"metadata"`
+}
+
+type edgeMetadata struct {
+	Name map[string]string `json:"name,omitempty"`
+}
+
+type nodeMetadata struct {
+	Type       string            `json:"type"`
+	Basename   string            `json:"basename"`
+	Name       string            `json:"name"`
+	Id         int               `json:"id"`
+	Uniq_id    int               `json:"uniq_id"`
+	Rank       int               `json:"rank,omitempty"`
+	Exclusive  bool              `json:"exclusive"`
+	Unit       string            `json:"unit"`
+	Size       int               `json:"size"`
+	Paths      map[string]string `json:"paths,omitempty"`
+	Properties map[string]string `json:"properties,omitempty"`
+}
+
+type graph struct {
+	Nodes []node `json:"nodes"`
+	Edges []edge `json:"edges"`
+	//	Metadata metadata 	`json:"metadata,omitempty"`
+	Directed bool `json:"directed,omitempty"`
+}
+
+type Fluxjgf struct {
+	Graph    graph           `json:"graph"`
+	Elements int             `json:"-"`
+	NodeMap  map[string]node `json:"-"`
+}
diff --git a/src/fluence/jobspec/jobspec.go b/src/fluence/jobspec/jobspec.go
index 8ef90ae..683f586 100644
--- a/src/fluence/jobspec/jobspec.go
+++ b/src/fluence/jobspec/jobspec.go
@@ -5,7 +5,7 @@ Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
 
-    http://www.apache.org/licenses/LICENSE-2.0
+	http://www.apache.org/licenses/LICENSE-2.0
 
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
@@ -25,9 +25,9 @@ import (
 	"gopkg.in/yaml.v2"
 )
 
-
-
 /*
+
+Structure of the PodSpec that needs to be generated, for reference
 Ps: &pb.PodSpec{
 			Id:        pod_jobspec.ID,
 			Container: pod_jobspec.Containers[0].Image,
@@ -38,149 +38,123 @@ Ps: &pb.PodSpec{
 		},
 */
 
-func CreateJobSpecYaml(pr *pb.PodSpec, count int32, filename string) error {
-		socket_resources := make([]Resource, 1)
-		command := []string{pr.Container}
-		socket_resources[0] = Resource{Type: "core", Count: int64(pr.Cpu)}
-		if pr.Memory > 0 {
-			toMB := pr.Memory >> 20
-			socket_resources = append(socket_resources, Resource{Type: "memory", Count: toMB})
-		}
+// CreateJobSpecYaml writes the protobuf jobspec into a yaml file
+func CreateJobSpecYaml(spec *pb.PodSpec, count int32, filename string) error {
 
-		if pr.Gpu > 0 {
-			socket_resources = append(socket_resources, Resource{Type: "gpu", Count: pr.Gpu})
-		}
+	command := []string{spec.Container}
+	fmt.Println("Labels ", spec.Labels, " ", len(spec.Labels))
 
-		fmt.Println("Labels ", pr.Labels, " ", len(pr.Labels))
+	js := JobSpec{
+		Version:    Version{Version: 9999},
+		Attributes: Attribute{System{Duration: 3600}},
 
-		js := JobSpec{
-			Version: Version{
-				Version: 9999,
+		// The name of the task likely needs to correspond with the pod
+		// Since we can't easily change the proto file, for now it is
+		// storing the pod namespaced name.
+		Tasks: []Task{
+			{
+				Command: command,
+				Slot:    "default",
+				Counts:  Count{PerSlot: 1},
 			},
-			Attributes: Attribute{
-				System{
-					Duration: 3600,
-				},
-			},
-			Tasks: []Task{
-				{
-					// Command: "[\""+command+"\"]",
-					Command: command,
-					Slot:    "default",
-					Counts: Count{
-						PerSlot: 1,
-					},
-				},
-			},
-		}
+		},
+	}
+
+	// Assemble resources!
+	socketResources := createSocketResources(spec)
+	js.Version.Resources = createResources(spec, socketResources, count)
+
+	// Write bytes to file
+	yamlbytes, err := yaml.Marshal(&js)
+	if err != nil {
+		log.Fatalf("[JobSpec] yaml.Marshal failed with '%s'\n", err)
+		return err
+	}
+	return writeBytes(yamlbytes, filename)
+}
+
+// WriteBytes writes a byte string to file
+func writeBytes(bytelist []byte, filename string) error {
+	fmt.Printf("[JobSpec] Preparing to write:\n%s\n", string(bytelist))
+	f, err := os.Create(filename)
+	if err != nil {
+		log.Fatalf("[JobSpec] Couldn't create file!!\n")
+		return err
+	}
+	defer f.Close()
+
+	_, err = f.Write(bytelist)
+	if err != nil {
+		log.Fatalf("[JobSpec] Couldn't write file!!\n")
+		return err
+	}
+
+	// Not sure why this is here, but will keep for now
+	_, err = f.WriteString("\n")
+	if err != nil {
+		log.Fatalf("[JobSpec] Couldn't append newline to file!!\n")
+	}
+	return err
+}
+
+func toGB(bytes int64) int64 {
+	res := float64(bytes) / math.Pow(10, 9)
+	return int64(res)
+}
+
+// createSocketResources creates the socket resources for the JobSpec
+func createSocketResources(spec *pb.PodSpec) []Resource {
 
-		slot_resource := make([]Resource, 1)
-		slot_resource[0] = Resource{
-			Type: "slot",
+	socketResources := []Resource{
+		{
+			Type: "core", Count: int64(spec.Cpu),
+		},
+	}
+
+	// TODO double check what we are converting from -> to
+	if spec.Memory > 0 {
+		toMB := spec.Memory >> 20
+		socketResources = append(socketResources, Resource{Type: "memory", Count: toMB})
+	}
+
+	if spec.Gpu > 0 {
+		socketResources = append(socketResources, Resource{Type: "gpu", Count: spec.Gpu})
+	}
+	return socketResources
+}
+
+// createResources assembles the list of JobSpec resources
+func createResources(spec *pb.PodSpec, socketResources []Resource, count int32) []Resource {
+
+	slotResource := []Resource{
+		{
+			Type:  "slot",
 			Count: int64(count),
 			Label: "default",
-			With: socket_resources,
-		}
-
-		if len(pr.Labels) > 0 {
-			for _, label := range pr.Labels {
-				if label == "zone" {
-					node_resource := make([]Resource, 1)
-					node_resource[0] = Resource{
-						Type: "subnet", 
+			With:  socketResources,
+		},
+	}
+
+	// Presence of the zone label means we need to add a subnet
+	if len(spec.Labels) > 0 {
+		for _, label := range spec.Labels {
+			if label == "zone" {
+				nodeResource := []Resource{
+					{
+						Type:  "subnet",
 						Count: 1,
 						With: []Resource{
 							{
-								Type: "node", 
+								Type:  "node",
 								Count: 1,
-								With: slot_resource, /*[]Resource{
-									{
-									Type: "socket", 
-									Count: 1,
-									With: slot_resource,
-									},
-								},*/
+								With:  slotResource,
 							},
 						},
-					}
-					js.Version.Resources = node_resource
+					},
 				}
-				
+				return nodeResource
 			}
-		
-		}  else {
-			fmt.Println("No labels, going with plain JobSpec")
-			js.Version.Resources = slot_resource
-		}
-		
-		// js := JobSpec{
-		// 	Version: Version{
-		// 		Version: 9999,
-		// 		Resources: []Resource{
-		// 			{
-		// 				Type:  "node",
-		// 				Count: 1,
-		// 				With: []Resource{
-		// 					{
-		// 						Type:  "socket",
-		// 						Count: 1,
-		// 						With: []Resource{
-		// 							{
-		// 								Type:  "slot",
-		// 								Count: int64(count),
-		// 								Label: "default",
-		// 								With:  socket_resources,
-		// 							},
-		// 						},
-		// 					},
-		// 				},
-		// 			},
-		// 		},
-		// 	},
-		// 	Attributes: Attribute{
-		// 		System{
-		// 			Duration: 3600,
-		// 		},
-		// 	},
-		// 	Tasks: []Task{
-		// 		{
-		// 			// Command: "[\""+command+"\"]",
-		// 			Command: command,
-		// 			Slot:    "default",
-		// 			Counts: Count{
-		// 				PerSlot: 1,
-		// 			},
-		// 		},
-		// 	},
-		// }
-		yamlbytes, err := yaml.Marshal(&js)
-		if err != nil {
-			log.Fatalf("[JobSpec] yaml.Marshal failed with '%s'\n", err)
-			return err
-		}
-		fmt.Printf("[JobSpec] JobSpec in YAML:\n%s\n", string(yamlbytes))
-		f, err := os.Create(filename)
-		if err != nil {
-			log.Fatalf("[JobSpec] Couldn't create yaml file!!\n")
-			return err
 		}
-		defer f.Close()
-
-		_, err = f.Write(yamlbytes)
-		if err != nil {
-			log.Fatalf("[JobSpec] Couldn't write yaml file!!\n")
-			return err
-		}
-
-		_, err = f.WriteString("\n")
-		if err != nil {
-			log.Fatalf("[JobSpec] Couldn't write yaml file!!\n")
-			return err
-		}
-	return nil
-}
-
-func toGB(bytes int64) int64 {
-	res := float64(bytes) / math.Pow(10, 9)
-	return int64(res)
+	}
+	return slotResource
 }
diff --git a/src/fluence/jobspec/types.go b/src/fluence/jobspec/types.go
index 9f4e4fc..8d6d06f 100644
--- a/src/fluence/jobspec/types.go
+++ b/src/fluence/jobspec/types.go
@@ -13,6 +13,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 */
+
 package jobspec
 
 type Version struct {
diff --git a/src/fluence/utils/utils.go b/src/fluence/utils/utils.go
index 2d6d932..aadcb41 100644
--- a/src/fluence/utils/utils.go
+++ b/src/fluence/utils/utils.go
@@ -4,7 +4,6 @@ import (
 	"context"
 	"fmt"
 
-	// "strings"
 	"encoding/json"
 
 	"github.com/flux-framework/flux-k8s/flux-plugin/fluence/jgf"
@@ -17,7 +16,8 @@ import (
 	resourcehelper "k8s.io/kubectl/pkg/util/resource"
 )
 
-func CreateJGF(filename string, label *string) error {
+// CreateJGF creates the Json Graph Format
+func CreateJGF(filename string, skipLabel *string) error {
 	ctx := context.Background()
 	config, err := rest.InClusterConfig()
 	if err != nil {
@@ -34,6 +34,9 @@ func CreateJGF(filename string, label *string) error {
 
 	var fluxgraph jgf.Fluxjgf
 	fluxgraph = jgf.InitJGF()
+
+	// TODO it looks like we can add more to the graph here -
+	// let's remember to consider what else we can.
 	// subnets := make(map[string]string)
 
 	cluster := fluxgraph.MakeCluster("k8scluster")
@@ -49,10 +52,14 @@ func CreateJGF(filename string, label *string) error {
 	var totalAllocCpu, totalmem int64
 	totalAllocCpu = 0
 	sdnCount := 0
+
 	for node_index, node := range nodes.Items {
+
+		// Question from V: what was this for (what is a worker)?
 		// _, worker := node.Labels["node-role.kubernetes.io/worker"]
-		if *label != "" {
-			_, fluxnode := node.Labels[*label]
+
+		if *skipLabel != "" {
+			_, fluxnode := node.Labels[*skipLabel]
 			if !fluxnode {
 				fmt.Println("Skipping node ", node.GetName())
 				continue
@@ -71,6 +78,7 @@ func CreateJGF(filename string, label *string) error {
 			if err != nil {
 				return err
 			}
+
 			// fmt.Println("Node ", node.GetName(), " has pods ", pods)
 			// Check if subnet already exists
 			// Here we build subnets according to topology.kubernetes.io/zone label
@@ -128,6 +136,9 @@ func CreateJGF(filename string, label *string) error {
 				core := fluxgraph.MakeCore(index, "core")
 				fluxgraph.MakeEdge(workernode, core, "contains") // workernode was socket
 				fluxgraph.MakeEdge(core, workernode, "in")
+
+				// Question from Vanessa:
+				// How can we get here and have vcores ever not equal to zero?
 				if vcores == 0 {
 					fluxgraph.MakeNFDProperties(core, index, "cpu-", &node.Labels)
 					// fluxgraph.MakeNFDProperties(core, index, "netmark-", &node.Labels)
@@ -190,30 +201,33 @@ type allocation struct {
 	CoreCount int
 }
 
-func ParseAllocResult(allocated string) []allocation {
+// ParseAllocResult takes an allocated (string) and parses into a list of allocation
+// We include the pod namespace/name for debugging later
+func ParseAllocResult(allocated, podName string) []allocation {
 	var dat map[string]interface{}
-	result := make([]allocation, 0)
+	result := []allocation{}
+
+	// Keep track of total core count across allocated
 	corecount := 0
+
+	// This should not happen - the string we get back should parse.
 	if err := json.Unmarshal([]byte(allocated), &dat); err != nil {
 		panic(err)
 	}
-	// fmt.Println("PRINTING DATA:\n", dat)
-	// graph := dat["graph"]
-	// fmt.Println("GET GRAPH:\n ", graph)
+
+	// Parse graph and nodes into interfaces
+	// TODO look at github.com/mitchellh/mapstructure
+	// that might make this easier
 	nodes := dat["graph"].(interface{})
 	str1 := nodes.(map[string]interface{})
-	// fmt.Println("GET NODES:\n", str1["nodes"])
 	str2 := str1["nodes"].([]interface{})
-	// fmt.Println("NODES:\n", len(str2))
+
 	for _, item := range str2 {
-		// fmt.Println("ITEM: ", item)
 		str1 = item.(map[string]interface{})
 		metadata := str1["metadata"].(map[string]interface{})
-		// fmt.Println("TYPE: ", metadata["type"])
 		if metadata["type"].(string) == "core" {
 			corecount = corecount + 1
 		}
-		// fmt.Println("BASENAME: ", metadata["basename"])
 		if metadata["type"].(string) == "node" {
 			result = append(result, allocation{
 				Type:      metadata["type"].(string),
@@ -221,18 +235,16 @@ func ParseAllocResult(allocated string) []allocation {
 				Basename:  metadata["basename"].(string),
 				CoreCount: corecount,
 			})
+
+			// Reset the corecount once we've added to a node
 			corecount = 0
-			// result.Type = metadata["type"].(string)
-			// result.Name = metadata["name"].(string)
-			// result.Basename = metadata["basename"].(string)
-			// return result
 		}
 	}
-	fmt.Println("FINAL NODE RESULT:\n", result)
+	fmt.Printf("Final node result for %s: %s\n", podName, result)
 	return result
 }
 
-// //// Utility functions
+// Utility functions
 func PrintOutput(reserved bool, allocated string, at int64, overhead float64, jobid uint64, fluxerr error) {
 	fmt.Println("\n\t----Match Allocate output---")
 	fmt.Printf("jobid: %d\nreserved: %t\nallocated: %s\nat: %d\noverhead: %f\nerror: %w\n", jobid, reserved, allocated, at, overhead, fluxerr)

From 7b9c47083cfd73b51a4c6140b1a8247bd311a11d Mon Sep 17 00:00:00 2001
From: vsoch <vsoch@users.noreply.github.com>
Date: Mon, 15 Jan 2024 07:31:38 -0700
Subject: [PATCH 04/28] fix: use podgroup millisecond precision timestamp

Problem: the podgroups with second precision have interleaving
Solution: try to create an internal representation with better
precision. This looks promising with early testing, but I need
to consider the edge cases and how to clean up the groups, otherwise
a pod group might be re-created later and still in the cache.

Signed-off-by: vsoch <vsoch@users.noreply.github.com>
---
 examples/indexed-jobs/job1.yaml               |   4 +-
 .../pkg/fluence/core/core.go                  | 173 ++++++++++++++----
 sig-scheduler-plugins/pkg/fluence/fluence.go  | 167 ++++++++---------
 sig-scheduler-plugins/pkg/fluence/group.go    | 101 ++++++++++
 4 files changed, 324 insertions(+), 121 deletions(-)
 create mode 100644 sig-scheduler-plugins/pkg/fluence/group.go

diff --git a/examples/indexed-jobs/job1.yaml b/examples/indexed-jobs/job1.yaml
index 5778bc3..609e843 100644
--- a/examples/indexed-jobs/job1.yaml
+++ b/examples/indexed-jobs/job1.yaml
@@ -3,8 +3,8 @@ kind: Job
 metadata:
   name: job-1
 spec:
-  completions: 10
-  parallelism: 10
+  completions: 5
+  parallelism: 5
   completionMode: Indexed
   template:
     metadata:
diff --git a/sig-scheduler-plugins/pkg/fluence/core/core.go b/sig-scheduler-plugins/pkg/fluence/core/core.go
index 5914441..8e209ea 100644
--- a/sig-scheduler-plugins/pkg/fluence/core/core.go
+++ b/sig-scheduler-plugins/pkg/fluence/core/core.go
@@ -3,81 +3,190 @@ package core
 import (
 	"fmt"
 
-	"k8s.io/klog/v2"
+	v1 "k8s.io/api/core/v1"
+
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+
 	"k8s.io/kubernetes/pkg/scheduler/framework"
 	pb "sigs.k8s.io/scheduler-plugins/pkg/fluence/fluxcli-grpc"
 )
 
+// FluxStateData is a CycleState
+// It holds the PodCache for a pod, which has node assignment, group, and group size
+// We also save the group name and size, and time created, in case we want to (somehow) resume scheduling
+// In practice I'm not sure how CycleState objects are dumped and loaded. Kueue has a dumper :P
+// https://github.com/kubernetes/enhancements/blob/master/keps/sig-scheduling/624-scheduling-framework/README.md#cyclestate
 type FluxStateData struct {
-	NodeName string
+	NodeCache NodeCache
 }
 
+// Clone is required for CycleState plugins
 func (s *FluxStateData) Clone() framework.StateData {
-	clone := &FluxStateData{
-		NodeName: s.NodeName,
+	return &FluxStateData{NodeCache: s.NodeCache}
+}
+
+// NewFluxState creates an entry for the CycleState with the minimum that we might need
+func NewFluxState(nodeName string, groupName string, size int32) *FluxStateData {
+	cache := NodeCache{
+		NodeName:     nodeName,
+		GroupName:    groupName,
+		MinGroupSize: size,
 	}
-	return clone
+	return &FluxStateData{NodeCache: cache}
 }
 
-type NodePodsCount struct {
+// NodeCache holds the node name and tasks for the node
+// For the PodGroupCache, these are organized by group name,
+// and there is a list of them
+type NodeCache struct {
 	NodeName string
-	Count    int
+
+	// This is derived from tasks, where
+	// task is an allocation to some node
+	// High level it is most often akin to the
+	// number of pods on the node. I'm not sure that I understand this
+	// https://github.com/flux-framework/flux-k8s/blob/9f24f36752e3cced1b1112d93bfa366fb58b3c84/src/fluence/fluxion/fluxion.go#L94-L97
+	// How does that relate to a single pod? It is called "Count" in other places
+	Tasks int
+
+	// These fields are primarily for the FluxStateData
+	// Without a PodGroup CRD we keep min size here
+	MinGroupSize int32
+	GroupName    string
 }
 
-var podgroupMap map[string][]NodePodsCount
+// A pod group cache holds a list of nodes for an allocation, where each has some number of tasks
+// along with the expected group size. This is intended to replace PodGroup
+// given the group name, size (derived from annotations) and timestamp
+type PodGroupCache struct {
+
+	// This is a cache of nodes for pods
+	Nodes []NodeCache
+	Size  int32
+	Name  string
 
+	// Keep track of when the group was initially created!
+	// This is like, the main thing we need.
+	TimeCreated metav1.MicroTime
+}
+
+// Memory cache of pod group name to pod group cache, above
+var podGroupCache map[string]*PodGroupCache
+
+// Init populates the podGroupCache
 func Init() {
-	podgroupMap = make(map[string][]NodePodsCount, 0)
+	podGroupCache = map[string]*PodGroupCache{}
 }
 
-func (n *NodePodsCount) Clone() framework.StateData {
-	return &NodePodsCount{
-		NodeName: n.NodeName,
-		Count:    n.Count,
+// RegisterPodGroup ensures that the PodGroup exists in the cache
+// This is an experimental replacement for an actual PodGroup
+// We take a timestampo, which if called from Less (during sorting) is tiem.Time
+// if called later (an individual pod) we go for its creation timestamp
+func RegisterPodGroup(pod *v1.Pod, groupName string, groupSize int32) error {
+	entry, ok := podGroupCache[groupName]
+
+	if !ok {
+
+		// Assume we create the group with the timestamp
+		// of the first pod seen. There might be imperfections
+		// by the second, but as long as we sort them via millisecond
+		// this should prevent interleaving
+		nodes := []NodeCache{}
+
+		// Create the new entry for the pod group
+		entry = &PodGroupCache{
+			Name:        groupName,
+			Size:        groupSize,
+			Nodes:       nodes,
+			TimeCreated: metav1.NowMicro(),
+		}
+
+		// Tell the user when it was created
+		fmt.Printf("Pod group %s was created at %s\n", entry.Name, entry.TimeCreated)
+	}
+
+	// If the size has changed, we currently do not allow updating it.
+	// We issue a warning. In the future this could be supported with a grow command.
+	if entry.Size != groupSize {
+		fmt.Printf("Pod group %s request to change size from %s to %s is not yet supported", groupName, entry.Size, groupSize)
+		// entry.GroupSize = groupSize
 	}
+	podGroupCache[groupName] = entry
+	return nil
+}
+
+// GetPodGroup gets a pod group in the cache by name
+func GetPodGroup(groupName string) *PodGroupCache {
+	entry, _ := podGroupCache[groupName]
+	return entry
+}
+
+// DeletePodGroup deletes a pod from the group cache
+func DeletePodGroup(groupName string) {
+	delete(podGroupCache, groupName)
 }
 
-func CreateNodePodsList(nodelist []*pb.NodeAlloc, pgname string) (nodepods []NodePodsCount) {
-	nodepods = make([]NodePodsCount, len(nodelist))
+// CreateNodePodsList creates a list of node pod caches
+func CreateNodePodsList(nodelist []*pb.NodeAlloc, groupName string) (nodepods []NodeCache) {
+
+	// Create a pod cache for each node
+	nodepods = make([]NodeCache, len(nodelist))
+
 	for i, v := range nodelist {
-		nodepods[i] = NodePodsCount{
+		nodepods[i] = NodeCache{
 			NodeName: v.GetNodeID(),
-			Count:    int(v.GetTasks()),
+			Tasks:    int(v.GetTasks()),
 		}
 	}
-	podgroupMap[pgname] = nodepods
-	klog.Info("MAP ", podgroupMap)
 
-	return
+	// Update the pods in the PodGraphCache
+	updatePodGroupNodes(groupName, nodepods)
+	fmt.Printf("Pod Group Cache ", podGroupCache)
+	return nodepods
+}
+
+// updatePodGroupList updates the PodGroupCache with a listing of nodes
+func updatePodGroupNodes(groupName string, nodes []NodeCache) {
+	group := podGroupCache[groupName]
+	group.Nodes = nodes
+	podGroupCache[groupName] = group
+}
+
+// HavePodNodes returns true if the listing of pods is not empty
+// This should be all pods that are needed - the allocation will not
+// be successful otherwise, so we just check > 0
+func (p *PodGroupCache) HavePodNodes() bool {
+	return len(p.Nodes) > 0
 }
 
-func HaveList(pgname string) bool {
-	_, exists := podgroupMap[pgname]
-	return exists
+// CancelAllocation resets the node cache and allocation status
+func (p *PodGroupCache) CancelAllocation() {
+	p.Nodes = []NodeCache{}
 }
 
-func GetNextNode(pgname string) (string, error) {
-	entry, ok := podgroupMap[pgname]
+// GetNextNode gets the next available node we can allocate for a group
+func GetNextNode(groupName string) (string, error) {
+	entry, ok := podGroupCache[groupName]
 	if !ok {
 		err := fmt.Errorf("Map is empty")
 		return "", err
 	}
-	if len(entry) == 0 {
+	if len(entry.Nodes) == 0 {
 		err := fmt.Errorf("Error while getting a node")
 		return "", err
 	}
 
-	nodename := entry[0].NodeName
+	nodename := entry.Nodes[0].NodeName
 
-	if entry[0].Count == 1 {
-		slice := entry[1:]
+	if entry.Nodes[0].Tasks == 1 {
+		slice := entry.Nodes[1:]
 		if len(slice) == 0 {
-			delete(podgroupMap, pgname)
+			delete(podGroupCache, groupName)
 			return nodename, nil
 		}
-		podgroupMap[pgname] = slice
+		updatePodGroupNodes(groupName, slice)
 		return nodename, nil
 	}
-	entry[0].Count = entry[0].Count - 1
+	entry.Nodes[0].Tasks = entry.Nodes[0].Tasks - 1
 	return nodename, nil
 }
diff --git a/sig-scheduler-plugins/pkg/fluence/fluence.go b/sig-scheduler-plugins/pkg/fluence/fluence.go
index 32fd513..a23f4a0 100644
--- a/sig-scheduler-plugins/pkg/fluence/fluence.go
+++ b/sig-scheduler-plugins/pkg/fluence/fluence.go
@@ -125,9 +125,6 @@ func New(_ runtime.Object, handle framework.Handle) (framework.Plugin, error) {
 	)
 	f.pgMgr = pgMgr
 
-	// Save the podLister to fluence to easily query for the group
-	f.podLister = podInformer.Lister()
-
 	// stopCh := make(chan struct{})
 	// defer close(stopCh)
 	// informerFactory.Start(stopCh)
@@ -148,29 +145,37 @@ func New(_ runtime.Object, handle framework.Handle) (framework.Plugin, error) {
 // 2. Compare the initialization timestamps of fluence pod groups
 // 3. Fall back, sort by namespace/name
 // See 	https://kubernetes.io/docs/concepts/scheduling-eviction/scheduling-framework/
+// Less is part of Sort, which is the earliest we can see a pod unless we use gate
+// IMPORTANT: Less sometimes is not called for smaller sizes, not sure why.
+// To get around this we call it during PreFilter too.
 func (f *Fluence) Less(podInfo1, podInfo2 *framework.QueuedPodInfo) bool {
 	klog.Infof("ordering pods from Coscheduling")
+
+	// ensure we have a PodGroup no matter what
+	klog.Infof("ensuring fluence groups")
+	podGroup1 := f.ensureFluenceGroup(podInfo1.Pod)
+	podGroup2 := f.ensureFluenceGroup(podInfo2.Pod)
+
+	// First preference to priority, but only if they are different
 	prio1 := corev1helpers.PodPriority(podInfo1.Pod)
 	prio2 := corev1helpers.PodPriority(podInfo2.Pod)
+
+	// ...and only allow this to sort if they aren't the same
+	// The assumption here is that pods with priority are ignored by fluence
 	if prio1 != prio2 {
 		return prio1 > prio2
 	}
-	creationTime1 := f.pgMgr.GetCreationTimestamp(podInfo1.Pod, *podInfo1.InitialAttemptTimestamp)
-	creationTime2 := f.pgMgr.GetCreationTimestamp(podInfo2.Pod, *podInfo2.InitialAttemptTimestamp)
-	if creationTime1.Equal(creationTime2) {
-		return coschedulingcore.GetNamespacedName(podInfo1.Pod) < coschedulingcore.GetNamespacedName(podInfo2.Pod)
-	}
-	return creationTime1.Before(creationTime2)
-}
 
-// getPodGroup gets the group information from the pod group manager
-// to determine if a pod is in a group. We return the group
-func (f *Fluence) getPodGroup(ctx context.Context, pod *v1.Pod) (string, *sched.PodGroup) {
-	pgName, pg := f.pgMgr.GetPodGroup(ctx, pod)
-	if pg == nil {
-		klog.InfoS("Not in group", "pod", klog.KObj(pod))
+	// Fluence can only compare if we have two known groups.
+	// This tries for that first, and falls back to the initial attempt timestamp
+	creationTime1 := f.getCreationTimestamp(podGroup1, podInfo1)
+	creationTime2 := f.getCreationTimestamp(podGroup2, podInfo2)
+
+	// If they are the same, fall back to sorting by name.
+	if creationTime1.Equal(&creationTime2) {
+		return coschedulingcore.GetNamespacedName(podInfo1.Pod) < coschedulingcore.GetNamespacedName(podInfo2.Pod)
 	}
-	return pgName, pg
+	return creationTime1.Before(&creationTime2)
 }
 
 // PreFilter checks info about the Pod / checks conditions that the cluster or the Pod must meet.
@@ -181,47 +186,38 @@ func (f *Fluence) PreFilter(
 	pod *v1.Pod,
 ) (*framework.PreFilterResult, *framework.Status) {
 
-	var (
-		err      error
-		nodename string
-	)
 	klog.Infof("Examining the pod")
 
-	// Get the pod group name and group
-	groupName, pg := f.getPodGroup(ctx, pod)
-	klog.Infof("group name is %s", groupName)
+	// groupName will be named according to the single pod namespace / pod if there wasn't
+	// a user defined group. This is a size 1 group we handle equivalently.
+	pg := f.getPodsGroup(pod)
 
-	// Case 1: We have a pod group
-	if pg != nil {
+	klog.Infof("The group size %d", pg.Size)
+	klog.Infof("group name is %s", pg.Name)
 
-		// We have not yet derived a node list
-		if !fcore.HaveList(groupName) {
-			klog.Infof("Getting a pod group")
-			if _, err = f.AskFlux(ctx, pod, int(pg.Spec.MinMember)); err != nil {
-				return nil, framework.NewStatus(framework.Unschedulable, err.Error())
-			}
-		}
-		nodename, err = fcore.GetNextNode(groupName)
-		klog.Infof("Node Selected %s (%s:%s)", nodename, pod.Name, groupName)
-		if err != nil {
-			return nil, framework.NewStatus(framework.Unschedulable, err.Error())
-		}
-
-	} else {
-
-		// Case 2: no group, a faux group of a lonely 1 :(
-		nodename, err = f.AskFlux(ctx, pod, 1)
+	// Note that it is always the case we have a group
+	// We have not yet derived a node list
+	if !pg.HavePodNodes() {
+		klog.Infof("Getting a pod group")
+		err := f.AskFlux(ctx, pod, int(pg.Size))
 		if err != nil {
 			return nil, framework.NewStatus(framework.Unschedulable, err.Error())
 		}
 	}
+	nodename, err := fcore.GetNextNode(pg.Name)
+	klog.Infof("Node Selected %s (%s:%s)", nodename, pod.Name, pg.Name)
+	if err != nil {
+		return nil, framework.NewStatus(framework.Unschedulable, err.Error())
+	}
 
 	// Create a fluxState (CycleState) with things that might be useful/
 	klog.Info("Node Selected: ", nodename)
-	state.Write(framework.StateKey(pod.Name), &fcore.FluxStateData{NodeName: nodename})
+	cache := fcore.NodeCache{NodeName: nodename}
+	state.Write(framework.StateKey(pod.Name), &fcore.FluxStateData{NodeCache: cache})
 	return nil, framework.NewStatus(framework.Success, "")
 }
 
+// TODO we need to account for affinity here
 func (f *Fluence) Filter(
 	ctx context.Context,
 	cycleState *framework.CycleState,
@@ -231,10 +227,10 @@ func (f *Fluence) Filter(
 
 	klog.Info("Filtering input node ", nodeInfo.Node().Name)
 	if v, e := cycleState.Read(framework.StateKey(pod.Name)); e == nil {
-		if value, ok := v.(*fcore.FluxStateData); ok && value.NodeName != nodeInfo.Node().Name {
+		if value, ok := v.(*fcore.FluxStateData); ok && value.NodeCache.NodeName != nodeInfo.Node().Name {
 			return framework.NewStatus(framework.Unschedulable, "pod is not permitted")
 		} else {
-			klog.Info("Filter: node selected by Flux ", value.NodeName)
+			klog.Infof("Filter: node %s selected for %s\n", value.NodeCache.NodeName, pod.Name)
 		}
 	}
 	return framework.NewStatus(framework.Success)
@@ -247,7 +243,7 @@ func (f *Fluence) PreFilterExtensions() framework.PreFilterExtensions {
 }
 
 // AskFlux will ask flux for an allocation for nodes for the pod group.
-func (f *Fluence) AskFlux(ctx context.Context, pod *v1.Pod, count int) (string, error) {
+func (f *Fluence) AskFlux(ctx context.Context, pod *v1.Pod, count int) error {
 	// clean up previous match if a pod has already allocated previously
 	f.mutex.Lock()
 	_, isPodAllocated := f.podNameToJobId[pod.Name]
@@ -256,7 +252,7 @@ func (f *Fluence) AskFlux(ctx context.Context, pod *v1.Pod, count int) (string,
 	if isPodAllocated {
 		klog.Info("Clean up previous allocation")
 		f.mutex.Lock()
-		f.cancelFluxJobForPod(pod.Name)
+		f.cancelFluxJobForPod(pod)
 		f.mutex.Unlock()
 	}
 
@@ -265,7 +261,7 @@ func (f *Fluence) AskFlux(ctx context.Context, pod *v1.Pod, count int) (string,
 
 	if err != nil {
 		klog.Errorf("[FluxClient] Error connecting to server: %v", err)
-		return "", err
+		return err
 	}
 	defer conn.Close()
 
@@ -278,47 +274,38 @@ func (f *Fluence) AskFlux(ctx context.Context, pod *v1.Pod, count int) (string,
 		Request: "allocate",
 		Count:   int32(count)}
 
-	r, err2 := grpcclient.Match(context.Background(), request)
-	if err2 != nil {
-		klog.Errorf("[FluxClient] did not receive any match response: %v", err2)
-		return "", err
+	// Question from vsoch; Why return err instead of err2 here?
+	// err would return a nil value, but we need to return non nil,
+	// otherwise it's going to try to use the allocation (but there is none)
+	r, err := grpcclient.Match(context.Background(), request)
+	if err != nil {
+		klog.Errorf("[FluxClient] did not receive any match response: %v", err)
+		return err
 	}
 
 	klog.Infof("[FluxClient] response podID %s", r.GetPodID())
 
-	_, pg := f.getPodGroup(ctx, pod)
+	// Presence of a podGroup is indicated by a groupName
+	// Flag that the group is allocated (yes we also have the job id, testing for now)
+	pg := f.getPodsGroup(pod)
 
-	if count > 1 || pg != nil {
-		pgFullName, _ := f.pgMgr.GetPodGroup(ctx, pod)
-		nodelist := fcore.CreateNodePodsList(r.GetNodelist(), pgFullName)
-		klog.Infof("[FluxClient] response nodeID %s", r.GetNodelist())
-		klog.Info("[FluxClient] Parsed Nodelist ", nodelist)
-		jobid := uint64(r.GetJobID())
+	nodelist := fcore.CreateNodePodsList(r.GetNodelist(), pg.Name)
+	klog.Infof("[FluxClient] response nodeID %s", r.GetNodelist())
+	klog.Info("[FluxClient] Parsed Nodelist ", nodelist)
+	jobid := uint64(r.GetJobID())
 
-		f.mutex.Lock()
-		f.podNameToJobId[pod.Name] = jobid
-		klog.Info("Check job set: ", f.podNameToJobId)
-		f.mutex.Unlock()
-	} else {
-		nodename := r.GetNodelist()[0].GetNodeID()
-		jobid := uint64(r.GetJobID())
-
-		f.mutex.Lock()
-		f.podNameToJobId[pod.Name] = jobid
-		klog.Info("Check job set: ", f.podNameToJobId)
-		f.mutex.Unlock()
-
-		return nodename, nil
-	}
-
-	return "", nil
+	f.mutex.Lock()
+	f.podNameToJobId[pod.Name] = jobid
+	klog.Info("Check job set: ", f.podNameToJobId)
+	f.mutex.Unlock()
+	return nil
 }
 
 // cancelFluxJobForPod cancels the flux job for a pod.
-func (f *Fluence) cancelFluxJobForPod(podName string) error {
-	jobid := f.podNameToJobId[podName]
+func (f *Fluence) cancelFluxJobForPod(pod *v1.Pod) error {
+	jobid := f.podNameToJobId[pod.Name]
 
-	klog.Infof("Cancel flux job: %v for pod %s", jobid, podName)
+	klog.Infof("Cancel flux job: %v for pod %s", jobid, pod.Name)
 
 	start := time.Now()
 
@@ -345,15 +332,19 @@ func (f *Fluence) cancelFluxJobForPod(podName string) error {
 	}
 
 	if res.Error == 0 {
-		delete(f.podNameToJobId, podName)
+		delete(f.podNameToJobId, pod.Name)
 	} else {
-		klog.Warningf("Failed to delete pod %s from the podname-jobid map.", podName)
+		klog.Warningf("Failed to delete pod %s from the podname-jobid map.", pod.Name)
 	}
 
+	// If we are successful, clear the group allocated nodes
+	pg := f.getPodsGroup(pod)
+	pg.CancelAllocation()
+
 	elapsed := metrics.SinceInSeconds(start)
 	klog.Info("Time elapsed (Cancel Job) :", elapsed)
 
-	klog.Infof("Job cancellation for pod %s result: %d", podName, err)
+	klog.Infof("Job cancellation for pod %s result: %d", pod.Name, err)
 	if klog.V(2).Enabled() {
 		klog.Info("Check job set: after delete")
 		klog.Info(f.podNameToJobId)
@@ -380,7 +371,7 @@ func (f *Fluence) updatePod(oldObj, newObj interface{}) {
 		defer f.mutex.Unlock()
 
 		if _, ok := f.podNameToJobId[newPod.Name]; ok {
-			f.cancelFluxJobForPod(newPod.Name)
+			f.cancelFluxJobForPod(newPod)
 		} else {
 			klog.Infof("Succeeded pod %s/%s doesn't have flux jobid", newPod.Namespace, newPod.Name)
 		}
@@ -392,7 +383,7 @@ func (f *Fluence) updatePod(oldObj, newObj interface{}) {
 		defer f.mutex.Unlock()
 
 		if _, ok := f.podNameToJobId[newPod.Name]; ok {
-			f.cancelFluxJobForPod(newPod.Name)
+			f.cancelFluxJobForPod(newPod)
 		} else {
 			klog.Errorf("Failed pod %s/%s doesn't have flux jobid", newPod.Namespace, newPod.Name)
 		}
@@ -403,6 +394,8 @@ func (f *Fluence) updatePod(oldObj, newObj interface{}) {
 	}
 }
 
+// deletePod handles the delete event handler
+// TODO when should we clear group from the cache?
 func (f *Fluence) deletePod(podObj interface{}) {
 	klog.Info("Delete Pod event handler")
 
@@ -417,7 +410,7 @@ func (f *Fluence) deletePod(podObj interface{}) {
 		defer f.mutex.Unlock()
 
 		if _, ok := f.podNameToJobId[pod.Name]; ok {
-			f.cancelFluxJobForPod(pod.Name)
+			f.cancelFluxJobForPod(pod)
 		} else {
 			klog.Infof("Terminating pod %s/%s doesn't have flux jobid", pod.Namespace, pod.Name)
 		}
@@ -426,7 +419,7 @@ func (f *Fluence) deletePod(podObj interface{}) {
 		defer f.mutex.Unlock()
 
 		if _, ok := f.podNameToJobId[pod.Name]; ok {
-			f.cancelFluxJobForPod(pod.Name)
+			f.cancelFluxJobForPod(pod)
 		} else {
 			klog.Infof("Deleted pod %s/%s doesn't have flux jobid", pod.Namespace, pod.Name)
 		}
diff --git a/sig-scheduler-plugins/pkg/fluence/group.go b/sig-scheduler-plugins/pkg/fluence/group.go
new file mode 100644
index 0000000..a2597eb
--- /dev/null
+++ b/sig-scheduler-plugins/pkg/fluence/group.go
@@ -0,0 +1,101 @@
+package fluence
+
+import (
+	"fmt"
+	"strconv"
+
+	v1 "k8s.io/api/core/v1"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	"k8s.io/klog/v2"
+	"k8s.io/kubernetes/pkg/scheduler/framework"
+
+	fcore "sigs.k8s.io/scheduler-plugins/pkg/fluence/core"
+)
+
+const (
+	PodGroupNameLabel = "fluence.pod-group"
+	PodGroupSizeLabel = "fluence.group-size"
+)
+
+// getDefaultGroupName returns a group name based on the pod namespace and name
+// We could do this for pods that are not labeled, and treat them as a size 1 group
+func (f *Fluence) getDefaultGroupName(pod *v1.Pod) string {
+	return fmt.Sprintf("%s-%s", pod.Namespace, pod.Name)
+}
+
+// getPodsGroup gets the pods group, if it exists.
+func (f *Fluence) getPodsGroup(pod *v1.Pod) *fcore.PodGroupCache {
+	groupName := f.ensureFluenceGroup(pod)
+	return fcore.GetPodGroup(groupName)
+}
+
+// ensureFluenceGroup ensure that a podGroup is created for the named fluence group
+// Preference goes to the traditional PodGroup (created by the user)
+// and falls back to having one created by fluence. If there is no PodGroup
+// created and no fluence annotation, we do not create the group.
+// Likely for fluence we'd want a cleanup function somehow too,
+// for now assume groups are unique by name.
+func (f *Fluence) ensureFluenceGroup(pod *v1.Pod) string {
+
+	// Get the group name and size from the fluence labels
+	groupName := f.getFluenceGroupName(pod)
+	groupSize := f.getFluenceGroupSize(pod)
+
+	// If there isn't a group, make a single node sized group
+	// This is so we can always treat the cases equally
+	if groupName == "" {
+		groupName = f.getDefaultGroupName(pod)
+	}
+	klog.Infof("group name for %s is %s", pod.Name, groupName)
+	klog.Infof("group size for %s is %d", pod.Name, groupSize)
+
+	// Register the pod group (with the pod) in our cache
+	fcore.RegisterPodGroup(pod, groupName, groupSize)
+	return groupName
+}
+
+// deleteFluenceGroup ensures the pod group is deleted, if it exists
+func (f *Fluence) deleteFluenceGroup(pod *v1.Pod) {
+
+	// Get the group name and size from the fluence labels
+	pg := f.getPodsGroup(pod)
+	fcore.DeletePodGroup(pg.Name)
+}
+
+// getFluenceGroupName looks for the group to indicate a fluence group, and returns it
+func (f *Fluence) getFluenceGroupName(pod *v1.Pod) string {
+	groupName, _ := pod.Labels[PodGroupNameLabel]
+	return groupName
+}
+
+// getFluenceGroupSize gets the size of the fluence group
+func (f *Fluence) getFluenceGroupSize(pod *v1.Pod) int32 {
+	size, _ := pod.Labels[PodGroupSizeLabel]
+
+	// Default size of 1 if the label is not set (but name is)
+	if size == "" {
+		return 1
+	}
+
+	// We don't want the scheduler to fail if someone puts a value for size
+	// that doesn't convert nicely. They can find this in the logs.
+	intSize, err := strconv.ParseUint(size, 10, 32)
+	if err != nil {
+		klog.Error("Parsing integer size for pod group")
+	}
+	return int32(intSize)
+}
+
+// getCreationTimestamp first tries the fluence group, then falls back to the initial attempt timestamp
+func (f *Fluence) getCreationTimestamp(groupName string, podInfo *framework.QueuedPodInfo) metav1.MicroTime {
+	pg := fcore.GetPodGroup(groupName)
+
+	// IsZero is an indicator if this was actually set
+	// If the group label was present and we have a group, this will be true
+	if !pg.TimeCreated.IsZero() {
+		klog.Infof("pod group %s was created at %s\n", groupName, pg.TimeCreated)
+		return pg.TimeCreated
+	}
+	// We should actually never get here.
+	return metav1.NewMicroTime(*podInfo.InitialAttemptTimestamp)
+}

From d6949a0703bd05769e1e9aac1f783bd774cfa844 Mon Sep 17 00:00:00 2001
From: vsoch <vsoch@users.noreply.github.com>
Date: Mon, 15 Jan 2024 19:33:29 -0700
Subject: [PATCH 05/28] logs: more for various steps to see what is going on

Signed-off-by: vsoch <vsoch@users.noreply.github.com>
---
 .../templates/deployment.yaml                 |  2 +-
 .../charts/as-a-second-scheduler/values.yaml  |  1 +
 .../pkg/fluence/core/core.go                  | 15 ++-
 sig-scheduler-plugins/pkg/fluence/fluence.go  | 99 +++++++++----------
 sig-scheduler-plugins/pkg/fluence/group.go    | 10 +-
 src/fluence/fluxion/fluxion.go                | 19 ++--
 src/fluence/utils/utils.go                    | 17 +++-
 7 files changed, 89 insertions(+), 74 deletions(-)

diff --git a/sig-scheduler-plugins/manifests/install/charts/as-a-second-scheduler/templates/deployment.yaml b/sig-scheduler-plugins/manifests/install/charts/as-a-second-scheduler/templates/deployment.yaml
index 8a73245..ffc3ce7 100644
--- a/sig-scheduler-plugins/manifests/install/charts/as-a-second-scheduler/templates/deployment.yaml
+++ b/sig-scheduler-plugins/manifests/install/charts/as-a-second-scheduler/templates/deployment.yaml
@@ -49,7 +49,7 @@ spec:
       - command:
         - /bin/kube-scheduler
         - --config=/etc/kubernetes/scheduler-config.yaml
-        - -v=9
+        - -v={{ .Values.scheduler.loggingLevel }}
         image: {{ .Values.scheduler.image }}
         imagePullPolicy: {{ .Values.scheduler.pullPolicy }}
         livenessProbe:
diff --git a/sig-scheduler-plugins/manifests/install/charts/as-a-second-scheduler/values.yaml b/sig-scheduler-plugins/manifests/install/charts/as-a-second-scheduler/values.yaml
index 1ae99f9..38da251 100644
--- a/sig-scheduler-plugins/manifests/install/charts/as-a-second-scheduler/values.yaml
+++ b/sig-scheduler-plugins/manifests/install/charts/as-a-second-scheduler/values.yaml
@@ -11,6 +11,7 @@ scheduler:
   policy: lonode
   pullPolicy: Always
   sidecarPullPolicy: Always
+  loggingLevel: "9"
 
 controller:
   name: scheduler-plugins-controller
diff --git a/sig-scheduler-plugins/pkg/fluence/core/core.go b/sig-scheduler-plugins/pkg/fluence/core/core.go
index 8e209ea..ddf8e4c 100644
--- a/sig-scheduler-plugins/pkg/fluence/core/core.go
+++ b/sig-scheduler-plugins/pkg/fluence/core/core.go
@@ -102,13 +102,13 @@ func RegisterPodGroup(pod *v1.Pod, groupName string, groupSize int32) error {
 		}
 
 		// Tell the user when it was created
-		fmt.Printf("Pod group %s was created at %s\n", entry.Name, entry.TimeCreated)
+		fmt.Printf("[Fluence] Pod group %s was created at %s\n", entry.Name, entry.TimeCreated)
 	}
 
 	// If the size has changed, we currently do not allow updating it.
 	// We issue a warning. In the future this could be supported with a grow command.
 	if entry.Size != groupSize {
-		fmt.Printf("Pod group %s request to change size from %s to %s is not yet supported", groupName, entry.Size, groupSize)
+		fmt.Printf("[Fluence] Pod group %s request to change size from %s to %s is not yet supported\n", groupName, entry.Size, groupSize)
 		// entry.GroupSize = groupSize
 	}
 	podGroupCache[groupName] = entry
@@ -141,7 +141,7 @@ func CreateNodePodsList(nodelist []*pb.NodeAlloc, groupName string) (nodepods []
 
 	// Update the pods in the PodGraphCache
 	updatePodGroupNodes(groupName, nodepods)
-	fmt.Printf("Pod Group Cache ", podGroupCache)
+	fmt.Printf("[Fluence] Pod group cache updated with nodes\n", podGroupCache)
 	return nodepods
 }
 
@@ -168,25 +168,30 @@ func (p *PodGroupCache) CancelAllocation() {
 func GetNextNode(groupName string) (string, error) {
 	entry, ok := podGroupCache[groupName]
 	if !ok {
-		err := fmt.Errorf("Map is empty")
+		err := fmt.Errorf("[Fluence] Map is empty\n")
 		return "", err
 	}
 	if len(entry.Nodes) == 0 {
-		err := fmt.Errorf("Error while getting a node")
+		err := fmt.Errorf("[Fluence] Error while getting a node\n")
 		return "", err
 	}
 
 	nodename := entry.Nodes[0].NodeName
+	fmt.Printf("[Fluence] Next node for group %s is %s", groupName, nodename)
 
 	if entry.Nodes[0].Tasks == 1 {
+		fmt.Println("[Fluence] First node has one task")
 		slice := entry.Nodes[1:]
 		if len(slice) == 0 {
+			fmt.Printf("[Fluence] After this node, the slice is empty, deleting group %s from cache\n", groupName)
 			delete(podGroupCache, groupName)
 			return nodename, nil
 		}
+		fmt.Println("[Fluence] After this node, the slide still has nodes")
 		updatePodGroupNodes(groupName, slice)
 		return nodename, nil
 	}
+	fmt.Println("[Fluence] Subtracting one task from first node")
 	entry.Nodes[0].Tasks = entry.Nodes[0].Tasks - 1
 	return nodename, nil
 }
diff --git a/sig-scheduler-plugins/pkg/fluence/fluence.go b/sig-scheduler-plugins/pkg/fluence/fluence.go
index a23f4a0..bbe6cee 100644
--- a/sig-scheduler-plugins/pkg/fluence/fluence.go
+++ b/sig-scheduler-plugins/pkg/fluence/fluence.go
@@ -34,9 +34,7 @@ import (
 	corev1helpers "k8s.io/component-helpers/scheduling/corev1"
 	"k8s.io/klog/v2"
 	"k8s.io/kubernetes/pkg/scheduler/framework"
-	"k8s.io/kubernetes/pkg/scheduler/metrics"
 
-	corelisters "k8s.io/client-go/listers/core/v1"
 	"sigs.k8s.io/controller-runtime/pkg/client"
 	sched "sigs.k8s.io/scheduler-plugins/apis/scheduling/v1alpha1"
 	coschedulingcore "sigs.k8s.io/scheduler-plugins/pkg/coscheduling/core"
@@ -51,9 +49,6 @@ type Fluence struct {
 	client         client.Client
 	podNameToJobId map[string]uint64
 	pgMgr          coschedulingcore.Manager
-
-	// The pod group manager has a lister, but it's private
-	podLister corelisters.PodLister
 }
 
 // Name is the name of the plugin used in the Registry and configurations.
@@ -80,7 +75,6 @@ func New(_ runtime.Object, handle framework.Handle) (framework.Plugin, error) {
 
 	f := &Fluence{handle: handle, podNameToJobId: make(map[string]uint64)}
 
-	klog.Info("Create plugin")
 	ctx := context.TODO()
 	fcore.Init()
 
@@ -91,7 +85,6 @@ func New(_ runtime.Object, handle framework.Handle) (framework.Plugin, error) {
 	})
 
 	go fluxPodsInformer.Run(ctx.Done())
-	klog.Info("Create generic pod informer")
 
 	scheme := runtime.NewScheme()
 	clientscheme.AddToScheme(scheme)
@@ -136,7 +129,7 @@ func New(_ runtime.Object, handle framework.Handle) (framework.Plugin, error) {
 		return nil, err
 	}
 
-	klog.Info("Fluence start")
+	klog.Info("Fluence scheduler plugin started")
 	return f, nil
 }
 
@@ -149,10 +142,10 @@ func New(_ runtime.Object, handle framework.Handle) (framework.Plugin, error) {
 // IMPORTANT: Less sometimes is not called for smaller sizes, not sure why.
 // To get around this we call it during PreFilter too.
 func (f *Fluence) Less(podInfo1, podInfo2 *framework.QueuedPodInfo) bool {
-	klog.Infof("ordering pods from Coscheduling")
+	klog.Infof("[Fluence] Ordering pods in Less")
 
 	// ensure we have a PodGroup no matter what
-	klog.Infof("ensuring fluence groups")
+	klog.Infof("[Fluence] Comparing %s and %s", podInfo1.Pod.Name, podInfo2.Pod.Name)
 	podGroup1 := f.ensureFluenceGroup(podInfo1.Pod)
 	podGroup2 := f.ensureFluenceGroup(podInfo2.Pod)
 
@@ -186,21 +179,22 @@ func (f *Fluence) PreFilter(
 	pod *v1.Pod,
 ) (*framework.PreFilterResult, *framework.Status) {
 
-	klog.Infof("Examining the pod")
+	klog.Infof("[Fluence] Examining pod %s", pod.Name)
 
 	// groupName will be named according to the single pod namespace / pod if there wasn't
 	// a user defined group. This is a size 1 group we handle equivalently.
 	pg := f.getPodsGroup(pod)
 
-	klog.Infof("The group size %d", pg.Size)
-	klog.Infof("group name is %s", pg.Name)
+	klog.Infof("[Fluence] Pod %s group size %d", pod.Name, pg.Size)
+	klog.Infof("[Fluence] Pod %s group name is %s", pod.Name, pg.Name)
 
 	// Note that it is always the case we have a group
 	// We have not yet derived a node list
 	if !pg.HavePodNodes() {
-		klog.Infof("Getting a pod group")
+		klog.Infof("[Fluence] Does not have nodes yet, asking Fluxion")
 		err := f.AskFlux(ctx, pod, int(pg.Size))
 		if err != nil {
+			klog.Infof("[Fluence] Fluxion returned an error %s, not schedulable", err.Error())
 			return nil, framework.NewStatus(framework.Unschedulable, err.Error())
 		}
 	}
@@ -250,17 +244,18 @@ func (f *Fluence) AskFlux(ctx context.Context, pod *v1.Pod, count int) error {
 	f.mutex.Unlock()
 
 	if isPodAllocated {
-		klog.Info("Clean up previous allocation")
+		klog.Info("[Fluence] Pod %s is allocated, cleaning up previous allocation", pod.Name)
 		f.mutex.Lock()
 		f.cancelFluxJobForPod(pod)
 		f.mutex.Unlock()
 	}
 
 	jobspec := utils.InspectPodInfo(pod)
+	klog.Infof("[Fluence] Inspect pod info, jobspec: %s", jobspec)
 	conn, err := grpc.Dial("127.0.0.1:4242", grpc.WithInsecure())
 
 	if err != nil {
-		klog.Errorf("[FluxClient] Error connecting to server: %v", err)
+		klog.Errorf("[Fluence] Error connecting to server: %v", err)
 		return err
 	}
 	defer conn.Close()
@@ -279,24 +274,27 @@ func (f *Fluence) AskFlux(ctx context.Context, pod *v1.Pod, count int) error {
 	// otherwise it's going to try to use the allocation (but there is none)
 	r, err := grpcclient.Match(context.Background(), request)
 	if err != nil {
-		klog.Errorf("[FluxClient] did not receive any match response: %v", err)
+		klog.Errorf("[Fluence] did not receive any match response: %v", err)
 		return err
 	}
 
-	klog.Infof("[FluxClient] response podID %s", r.GetPodID())
+	klog.Infof("[Fluence] response podID %s", r.GetPodID())
 
 	// Presence of a podGroup is indicated by a groupName
 	// Flag that the group is allocated (yes we also have the job id, testing for now)
 	pg := f.getPodsGroup(pod)
 
-	nodelist := fcore.CreateNodePodsList(r.GetNodelist(), pg.Name)
-	klog.Infof("[FluxClient] response nodeID %s", r.GetNodelist())
-	klog.Info("[FluxClient] Parsed Nodelist ", nodelist)
+	// Get the nodelist and inspect
+	nodes := r.GetNodelist()
+	klog.Infof("[Fluence] Nodelist returned from Fluxion: %s", nodes)
+
+	nodelist := fcore.CreateNodePodsList(nodes, pg.Name)
+	klog.Infof("[Fluence] parsed node pods list %s", nodelist)
 	jobid := uint64(r.GetJobID())
 
 	f.mutex.Lock()
 	f.podNameToJobId[pod.Name] = jobid
-	klog.Info("Check job set: ", f.podNameToJobId)
+	klog.Info("[Fluence] Check job assignment: ", f.podNameToJobId)
 	f.mutex.Unlock()
 	return nil
 }
@@ -305,14 +303,12 @@ func (f *Fluence) AskFlux(ctx context.Context, pod *v1.Pod, count int) error {
 func (f *Fluence) cancelFluxJobForPod(pod *v1.Pod) error {
 	jobid := f.podNameToJobId[pod.Name]
 
-	klog.Infof("Cancel flux job: %v for pod %s", jobid, pod.Name)
-
-	start := time.Now()
+	klog.Infof("[Fluence] Cancel flux job: %v for pod %s", jobid, pod.Name)
 
 	conn, err := grpc.Dial("127.0.0.1:4242", grpc.WithInsecure())
 
 	if err != nil {
-		klog.Errorf("[FluxClient] Error connecting to server: %v", err)
+		klog.Errorf("[Fluence] Error connecting to server: %v", err)
 		return err
 	}
 	defer conn.Close()
@@ -321,43 +317,36 @@ func (f *Fluence) cancelFluxJobForPod(pod *v1.Pod) error {
 	_, cancel := context.WithTimeout(context.Background(), 200*time.Second)
 	defer cancel()
 
-	request := &pb.CancelRequest{
-		JobID: int64(jobid),
-	}
-
+	// I think this error reflects the success or failure of the cancel request
+	request := &pb.CancelRequest{JobID: int64(jobid)}
 	res, err := grpcclient.Cancel(context.Background(), request)
 	if err != nil {
-		klog.Errorf("[FluxClient] did not receive any cancel response: %v", err)
+		klog.Errorf("[Fluence] did not receive any cancel response: %v", err)
 		return err
 	}
+	klog.Infof("[Fluence] Job cancellation for pod %s result: %d", pod.Name, res.Error)
 
+	// And this error is if the cancel was successful or not
 	if res.Error == 0 {
+		klog.Infof("[Fluence] Successful cancel of flux job: %v for pod %s", jobid, pod.Name)
 		delete(f.podNameToJobId, pod.Name)
-	} else {
-		klog.Warningf("Failed to delete pod %s from the podname-jobid map.", pod.Name)
-	}
-
-	// If we are successful, clear the group allocated nodes
-	pg := f.getPodsGroup(pod)
-	pg.CancelAllocation()
 
-	elapsed := metrics.SinceInSeconds(start)
-	klog.Info("Time elapsed (Cancel Job) :", elapsed)
-
-	klog.Infof("Job cancellation for pod %s result: %d", pod.Name, err)
-	if klog.V(2).Enabled() {
-		klog.Info("Check job set: after delete")
-		klog.Info(f.podNameToJobId)
+		// If we are successful, clear the group allocated nodes
+		pg := f.getPodsGroup(pod)
+		pg.CancelAllocation()
+	} else {
+		klog.Warningf("[Fluence] Failed to cancel flux job %v for pod %s", jobid, pod.Name)
 	}
 	return nil
 }
 
 // EventHandlers updatePod handles cleaning up resources
 func (f *Fluence) updatePod(oldObj, newObj interface{}) {
-	// klog.Info("Update Pod event handler")
+
+	oldPod := oldObj.(*v1.Pod)
 	newPod := newObj.(*v1.Pod)
 
-	klog.Infof("Processing event for pod %s", newPod.Name)
+	klog.Infof("[Fluence] Processing event for pod %s from %s to %s", newPod.Name, newPod.Status.Phase, oldPod.Status.Phase)
 
 	switch newPod.Status.Phase {
 	case v1.PodPending:
@@ -365,7 +354,7 @@ func (f *Fluence) updatePod(oldObj, newObj interface{}) {
 	case v1.PodRunning:
 		// if a pod is start running, we can add it state to the delta graph if it is scheduled by other scheduler
 	case v1.PodSucceeded:
-		klog.Infof("Pod %s succeeded, Fluence needs to free the resources", newPod.Name)
+		klog.Infof("[Fluence] Pod %s succeeded, Fluence needs to free the resources", newPod.Name)
 
 		f.mutex.Lock()
 		defer f.mutex.Unlock()
@@ -373,11 +362,11 @@ func (f *Fluence) updatePod(oldObj, newObj interface{}) {
 		if _, ok := f.podNameToJobId[newPod.Name]; ok {
 			f.cancelFluxJobForPod(newPod)
 		} else {
-			klog.Infof("Succeeded pod %s/%s doesn't have flux jobid", newPod.Namespace, newPod.Name)
+			klog.Infof("[Fluence] Succeeded pod %s/%s doesn't have flux jobid", newPod.Namespace, newPod.Name)
 		}
 	case v1.PodFailed:
 		// a corner case need to be tested, the pod exit code is not 0, can be created with segmentation fault pi test
-		klog.Warningf("Pod %s failed, Fluence needs to free the resources", newPod.Name)
+		klog.Warningf("[Fluence] Pod %s failed, Fluence needs to free the resources", newPod.Name)
 
 		f.mutex.Lock()
 		defer f.mutex.Unlock()
@@ -385,7 +374,7 @@ func (f *Fluence) updatePod(oldObj, newObj interface{}) {
 		if _, ok := f.podNameToJobId[newPod.Name]; ok {
 			f.cancelFluxJobForPod(newPod)
 		} else {
-			klog.Errorf("Failed pod %s/%s doesn't have flux jobid", newPod.Namespace, newPod.Name)
+			klog.Errorf("[Fluence] Failed pod %s/%s doesn't have flux jobid", newPod.Namespace, newPod.Name)
 		}
 	case v1.PodUnknown:
 		// don't know how to deal with it as it's unknown phase
@@ -400,11 +389,11 @@ func (f *Fluence) deletePod(podObj interface{}) {
 	klog.Info("Delete Pod event handler")
 
 	pod := podObj.(*v1.Pod)
-	klog.Info("Pod status: ", pod.Status.Phase)
+	klog.Infof("[Fluence] Delete pod has status %s", pod.Status.Phase)
 	switch pod.Status.Phase {
 	case v1.PodSucceeded:
 	case v1.PodPending:
-		klog.Infof("Pod %s completed and is Pending termination, Fluence needs to free the resources", pod.Name)
+		klog.Infof("[Fluence] Pod %s completed and is Pending termination, Fluence needs to free the resources", pod.Name)
 
 		f.mutex.Lock()
 		defer f.mutex.Unlock()
@@ -412,7 +401,7 @@ func (f *Fluence) deletePod(podObj interface{}) {
 		if _, ok := f.podNameToJobId[pod.Name]; ok {
 			f.cancelFluxJobForPod(pod)
 		} else {
-			klog.Infof("Terminating pod %s/%s doesn't have flux jobid", pod.Namespace, pod.Name)
+			klog.Infof("[Fluence] Terminating pod %s/%s doesn't have flux jobid", pod.Namespace, pod.Name)
 		}
 	case v1.PodRunning:
 		f.mutex.Lock()
@@ -421,7 +410,7 @@ func (f *Fluence) deletePod(podObj interface{}) {
 		if _, ok := f.podNameToJobId[pod.Name]; ok {
 			f.cancelFluxJobForPod(pod)
 		} else {
-			klog.Infof("Deleted pod %s/%s doesn't have flux jobid", pod.Namespace, pod.Name)
+			klog.Infof("[Fluence] Deleted pod %s/%s doesn't have flux jobid", pod.Namespace, pod.Name)
 		}
 	}
 }
diff --git a/sig-scheduler-plugins/pkg/fluence/group.go b/sig-scheduler-plugins/pkg/fluence/group.go
index a2597eb..6f55a8b 100644
--- a/sig-scheduler-plugins/pkg/fluence/group.go
+++ b/sig-scheduler-plugins/pkg/fluence/group.go
@@ -44,10 +44,11 @@ func (f *Fluence) ensureFluenceGroup(pod *v1.Pod) string {
 	// If there isn't a group, make a single node sized group
 	// This is so we can always treat the cases equally
 	if groupName == "" {
+		klog.Infof("  [Fluence] Group annotation missing for pod %s", pod.Name)
 		groupName = f.getDefaultGroupName(pod)
 	}
-	klog.Infof("group name for %s is %s", pod.Name, groupName)
-	klog.Infof("group size for %s is %d", pod.Name, groupSize)
+	klog.Infof("  [Fluence] Group name for %s is %s", pod.Name, groupName)
+	klog.Infof("  [Fluence] Group size for %s is %d", pod.Name, groupSize)
 
 	// Register the pod group (with the pod) in our cache
 	fcore.RegisterPodGroup(pod, groupName, groupSize)
@@ -81,7 +82,7 @@ func (f *Fluence) getFluenceGroupSize(pod *v1.Pod) int32 {
 	// that doesn't convert nicely. They can find this in the logs.
 	intSize, err := strconv.ParseUint(size, 10, 32)
 	if err != nil {
-		klog.Error("Parsing integer size for pod group")
+		klog.Error("  [Fluence] Parsing integer size for pod group")
 	}
 	return int32(intSize)
 }
@@ -93,9 +94,10 @@ func (f *Fluence) getCreationTimestamp(groupName string, podInfo *framework.Queu
 	// IsZero is an indicator if this was actually set
 	// If the group label was present and we have a group, this will be true
 	if !pg.TimeCreated.IsZero() {
-		klog.Infof("pod group %s was created at %s\n", groupName, pg.TimeCreated)
+		klog.Infof("  [Fluence] Pod group %s was created at %s\n", groupName, pg.TimeCreated)
 		return pg.TimeCreated
 	}
 	// We should actually never get here.
+	klog.Errorf("  [Fluence] Pod group %s time IsZero, we should not have reached here", groupName)
 	return metav1.NewMicroTime(*podInfo.InitialAttemptTimestamp)
 }
diff --git a/src/fluence/fluxion/fluxion.go b/src/fluence/fluxion/fluxion.go
index 6478602..2a8fd7f 100644
--- a/src/fluence/fluxion/fluxion.go
+++ b/src/fluence/fluxion/fluxion.go
@@ -96,7 +96,7 @@ func (s *Fluxion) Match(ctx context.Context, in *pb.MatchRequest) (*pb.MatchResp
 	emptyResponse := &pb.MatchResponse{}
 
 	// Prepare an empty match response (that can still be serialized)
-	fmt.Printf("[GRPCServer] Received Match request %v\n", in)
+	fmt.Printf("[Fluence][MatchRPC] Received Match request %v\n", in)
 
 	// Generate the jobspec, written to temporary file and read as string
 	spec, err := s.generateJobspec(in)
@@ -107,18 +107,25 @@ func (s *Fluxion) Match(ctx context.Context, in *pb.MatchRequest) (*pb.MatchResp
 	// Ask flux to match allocate!
 	reserved, allocated, at, overhead, jobid, fluxerr := s.cli.MatchAllocate(false, string(spec))
 	utils.PrintOutput(reserved, allocated, at, overhead, jobid, fluxerr)
-	fmt.Printf("[MatchRPC] Errors so far: %s\n", s.cli.GetErrMsg())
+
+	// Be explicit about errors (or not)
+	errorMessages := s.cli.GetErrMsg()
+	if errorMessages == "" {
+		fmt.Println("[Fluence][MatchRPC] There are no errors")
+	} else {
+		fmt.Printf("[Fluence][MatchRPC] Errors so far: %s\n", errorMessages)
+	}
 	if fluxerr != nil {
-		fmt.Printf("[GRPCServer] Flux err is %w\n", fluxerr)
-		return emptyResponse, errors.New("Error in ReapiCliMatchAllocate")
+		fmt.Printf("[Fluence][MatchRPC] Flux err is %w\n", fluxerr)
+		return emptyResponse, errors.New("[Fluence] Error in ReapiCliMatchAllocate")
 	}
 
 	// This usually means we cannot allocate
 	// We need to return an error here otherwise we try to pass an empty string
 	// to other RPC endpoints and get back an error.
 	if allocated == "" {
-		fmt.Println("[GRPCServer] Allocated is empty")
-		return emptyResponse, errors.New("allocation was not possible")
+		fmt.Println("[Fluence][MatchRPC] Allocated is empty")
+		return emptyResponse, errors.New("Allocation was not possible")
 	}
 
 	// Pass the spec name in so we can include it in the allocation result
diff --git a/src/fluence/utils/utils.go b/src/fluence/utils/utils.go
index aadcb41..961a77a 100644
--- a/src/fluence/utils/utils.go
+++ b/src/fluence/utils/utils.go
@@ -206,6 +206,7 @@ type allocation struct {
 func ParseAllocResult(allocated, podName string) []allocation {
 	var dat map[string]interface{}
 	result := []allocation{}
+	fmt.Printf("Raw allocated response: %s\n", allocated)
 
 	// Keep track of total core count across allocated
 	corecount := 0
@@ -214,7 +215,6 @@ func ParseAllocResult(allocated, podName string) []allocation {
 	if err := json.Unmarshal([]byte(allocated), &dat); err != nil {
 		panic(err)
 	}
-
 	// Parse graph and nodes into interfaces
 	// TODO look at github.com/mitchellh/mapstructure
 	// that might make this easier
@@ -240,12 +240,23 @@ func ParseAllocResult(allocated, podName string) []allocation {
 			corecount = 0
 		}
 	}
-	fmt.Printf("Final node result for %s: %s\n", podName, result)
+	fmt.Printf("Final node result for %s\n", podName)
+	for i, alloc := range result {
+		fmt.Printf("Node %d: %s\n", i, alloc.Name)
+		fmt.Printf("  Type: %s\n  Name: %s\n  Basename: %s\n  CoreCount: %d\n",
+			alloc.Type, alloc.Name, alloc.Basename, alloc.CoreCount)
+
+	}
 	return result
 }
 
 // Utility functions
 func PrintOutput(reserved bool, allocated string, at int64, overhead float64, jobid uint64, fluxerr error) {
 	fmt.Println("\n\t----Match Allocate output---")
-	fmt.Printf("jobid: %d\nreserved: %t\nallocated: %s\nat: %d\noverhead: %f\nerror: %w\n", jobid, reserved, allocated, at, overhead, fluxerr)
+	fmt.Printf("jobid: %d\nreserved: %t\nallocated: %s\nat: %d\noverhead: %f\n", jobid, reserved, allocated, at, overhead)
+
+	// Only print error if we had one
+	if fluxerr != nil {
+		fmt.Printf("error: %w\n", fluxerr)
+	}
 }

From f8ca47ee63996e7e73bf8bb47cf0fdb1b49e6c0c Mon Sep 17 00:00:00 2001
From: vsoch <vsoch@users.noreply.github.com>
Date: Mon, 15 Jan 2024 22:29:10 -0700
Subject: [PATCH 06/28] add examples with lammps to reproduce error

Signed-off-by: vsoch <vsoch@users.noreply.github.com>
---
 examples/pod-group/lammps/lammps2.yaml        | 22 ++++++++++++++++++
 examples/pod-group/lammps/lammps4-2.yaml      | 22 ++++++++++++++++++
 examples/pod-group/lammps/lammps4-3.yaml      | 22 ++++++++++++++++++
 examples/pod-group/lammps/lammps4.yaml        | 23 +++++++++++++++++++
 examples/pod-group/lammps/lammps5.yaml        | 22 ++++++++++++++++++
 examples/pod-group/lammps/lammps6.yaml        | 22 ++++++++++++++++++
 .../pkg/fluence/core/core.go                  |  7 ++++++
 sig-scheduler-plugins/pkg/fluence/fluence.go  | 22 +++++++++---------
 sig-scheduler-plugins/pkg/fluence/group.go    | 17 +++++++-------
 9 files changed, 160 insertions(+), 19 deletions(-)
 create mode 100644 examples/pod-group/lammps/lammps2.yaml
 create mode 100644 examples/pod-group/lammps/lammps4-2.yaml
 create mode 100644 examples/pod-group/lammps/lammps4-3.yaml
 create mode 100644 examples/pod-group/lammps/lammps4.yaml
 create mode 100644 examples/pod-group/lammps/lammps5.yaml
 create mode 100644 examples/pod-group/lammps/lammps6.yaml

diff --git a/examples/pod-group/lammps/lammps2.yaml b/examples/pod-group/lammps/lammps2.yaml
new file mode 100644
index 0000000..acdd2d5
--- /dev/null
+++ b/examples/pod-group/lammps/lammps2.yaml
@@ -0,0 +1,22 @@
+apiVersion: flux-framework.org/v1alpha2
+kind: MiniCluster
+metadata:
+  name: lammps2
+spec:
+  size: 2
+  network: 
+    headlessName: l2
+  pod:
+    schedulerName: fluence
+    labels:
+      fluence.pod-group: lammps2
+      fluence.group-size: "2"
+  containers:
+    - image: ghcr.io/converged-computing/metric-lammps:latest@sha256:e24a1ba8954f5a0a7a0bd854cfc5ca7f82ca12607dc6ace38d838591b8deb8ed
+      workingDir: /opt/lammps/examples/reaxff/HNS
+      command: lmp -v x 1 -v y 1 -v z 1 -in in.reaxc.hns -nocite
+      resources:
+        limits:
+          cpu: 2
+        requests:
+          cpu: 2
\ No newline at end of file
diff --git a/examples/pod-group/lammps/lammps4-2.yaml b/examples/pod-group/lammps/lammps4-2.yaml
new file mode 100644
index 0000000..777e73c
--- /dev/null
+++ b/examples/pod-group/lammps/lammps4-2.yaml
@@ -0,0 +1,22 @@
+apiVersion: flux-framework.org/v1alpha2
+kind: MiniCluster
+metadata:
+  name: lammps4-2
+spec:
+  size: 4
+  network: 
+    headlessName: l42
+  pod:
+    schedulerName: fluence
+    labels:
+      fluence.pod-group: lammps4-2
+      fluence.group-size: "4"
+  containers:
+    - image: ghcr.io/converged-computing/metric-lammps:latest@sha256:e24a1ba8954f5a0a7a0bd854cfc5ca7f82ca12607dc6ace38d838591b8deb8ed
+      workingDir: /opt/lammps/examples/reaxff/HNS
+      command: lmp -v x 1 -v y 1 -v z 1 -in in.reaxc.hns -nocite
+      resources:
+        limits:
+          cpu: 2
+        requests:
+          cpu: 2
\ No newline at end of file
diff --git a/examples/pod-group/lammps/lammps4-3.yaml b/examples/pod-group/lammps/lammps4-3.yaml
new file mode 100644
index 0000000..76c5ed0
--- /dev/null
+++ b/examples/pod-group/lammps/lammps4-3.yaml
@@ -0,0 +1,22 @@
+apiVersion: flux-framework.org/v1alpha2
+kind: MiniCluster
+metadata:
+  name: lammps4-3
+spec:
+  size: 4
+  network: 
+    headlessName: l43
+  pod:
+    schedulerName: fluence
+    labels:
+      fluence.pod-group: lammps4-3
+      fluence.group-size: "4"
+  containers:
+    - image: ghcr.io/converged-computing/metric-lammps:latest@sha256:e24a1ba8954f5a0a7a0bd854cfc5ca7f82ca12607dc6ace38d838591b8deb8ed
+      workingDir: /opt/lammps/examples/reaxff/HNS
+      command: lmp -v x 1 -v y 1 -v z 1 -in in.reaxc.hns -nocite
+      resources:
+        limits:
+          cpu: 2
+        requests:
+          cpu: 2
\ No newline at end of file
diff --git a/examples/pod-group/lammps/lammps4.yaml b/examples/pod-group/lammps/lammps4.yaml
new file mode 100644
index 0000000..38ae0a7
--- /dev/null
+++ b/examples/pod-group/lammps/lammps4.yaml
@@ -0,0 +1,23 @@
+apiVersion: flux-framework.org/v1alpha2
+kind: MiniCluster
+metadata:
+  name: lammps4
+spec:
+  size: 4
+  network: 
+    headlessName: l4
+  pod:
+    schedulerName: fluence
+    labels:
+      app: lammps
+      fluence.pod-group: lammps4
+      fluence.group-size: "4"
+  containers:
+    - image: ghcr.io/converged-computing/metric-lammps:latest@sha256:e24a1ba8954f5a0a7a0bd854cfc5ca7f82ca12607dc6ace38d838591b8deb8ed
+      workingDir: /opt/lammps/examples/reaxff/HNS
+      command: lmp -v x 1 -v y 1 -v z 1 -in in.reaxc.hns -nocite
+      resources:
+        limits:
+          cpu: 2
+        requests:
+          cpu: 2
\ No newline at end of file
diff --git a/examples/pod-group/lammps/lammps5.yaml b/examples/pod-group/lammps/lammps5.yaml
new file mode 100644
index 0000000..7546b48
--- /dev/null
+++ b/examples/pod-group/lammps/lammps5.yaml
@@ -0,0 +1,22 @@
+apiVersion: flux-framework.org/v1alpha2
+kind: MiniCluster
+metadata:
+  name: lammps5
+spec:
+  size: 5
+  network: 
+    headlessName: l5
+  pod:
+    schedulerName: fluence
+    labels:
+      fluence.pod-group: lammps5
+      fluence.group-size: "5"
+  containers:
+    - image: ghcr.io/converged-computing/metric-lammps:latest@sha256:e24a1ba8954f5a0a7a0bd854cfc5ca7f82ca12607dc6ace38d838591b8deb8ed
+      workingDir: /opt/lammps/examples/reaxff/HNS
+      command: lmp -v x 1 -v y 1 -v z 1 -in in.reaxc.hns -nocite
+      resources:
+        limits:
+          cpu: 2
+        requests:
+          cpu: 2
\ No newline at end of file
diff --git a/examples/pod-group/lammps/lammps6.yaml b/examples/pod-group/lammps/lammps6.yaml
new file mode 100644
index 0000000..2030192
--- /dev/null
+++ b/examples/pod-group/lammps/lammps6.yaml
@@ -0,0 +1,22 @@
+apiVersion: flux-framework.org/v1alpha2
+kind: MiniCluster
+metadata:
+  name: lammps6
+spec:
+  size: 6
+  network: 
+    headlessName: l6
+  pod:
+    schedulerName: fluence
+    labels:
+      fluence.pod-group: lammps6
+      fluence.group-size: "6"
+  containers:
+    - image: ghcr.io/converged-computing/metric-lammps:latest@sha256:e24a1ba8954f5a0a7a0bd854cfc5ca7f82ca12607dc6ace38d838591b8deb8ed
+      workingDir: /opt/lammps/examples/reaxff/HNS
+      command: lmp -v x 1 -v y 1 -v z 1 -in in.reaxc.hns -nocite
+      resources:
+        limits:
+          cpu: 2
+        requests:
+          cpu: 2
\ No newline at end of file
diff --git a/sig-scheduler-plugins/pkg/fluence/core/core.go b/sig-scheduler-plugins/pkg/fluence/core/core.go
index ddf8e4c..135659f 100644
--- a/sig-scheduler-plugins/pkg/fluence/core/core.go
+++ b/sig-scheduler-plugins/pkg/fluence/core/core.go
@@ -126,6 +126,13 @@ func DeletePodGroup(groupName string) {
 	delete(podGroupCache, groupName)
 }
 
+// ListGroups lists groups, primarily for debugging
+func ListGroups() {
+	for name, pg := range podGroupCache {
+		fmt.Printf("                    %s: size %s, created at %s\n", name, pg.Size, &pg.TimeCreated)
+	}
+}
+
 // CreateNodePodsList creates a list of node pod caches
 func CreateNodePodsList(nodelist []*pb.NodeAlloc, groupName string) (nodepods []NodeCache) {
 
diff --git a/sig-scheduler-plugins/pkg/fluence/fluence.go b/sig-scheduler-plugins/pkg/fluence/fluence.go
index bbe6cee..6752764 100644
--- a/sig-scheduler-plugins/pkg/fluence/fluence.go
+++ b/sig-scheduler-plugins/pkg/fluence/fluence.go
@@ -244,18 +244,18 @@ func (f *Fluence) AskFlux(ctx context.Context, pod *v1.Pod, count int) error {
 	f.mutex.Unlock()
 
 	if isPodAllocated {
-		klog.Info("[Fluence] Pod %s is allocated, cleaning up previous allocation", pod.Name)
+		klog.Infof("[Fluence] Pod %s is allocated, cleaning up previous allocation\n", pod.Name)
 		f.mutex.Lock()
 		f.cancelFluxJobForPod(pod)
 		f.mutex.Unlock()
 	}
 
 	jobspec := utils.InspectPodInfo(pod)
-	klog.Infof("[Fluence] Inspect pod info, jobspec: %s", jobspec)
+	klog.Infof("[Fluence] Inspect pod info, jobspec: %s\n", jobspec)
 	conn, err := grpc.Dial("127.0.0.1:4242", grpc.WithInsecure())
 
 	if err != nil {
-		klog.Errorf("[Fluence] Error connecting to server: %v", err)
+		klog.Errorf("[Fluence] Error connecting to server: %v\n", err)
 		return err
 	}
 	defer conn.Close()
@@ -274,11 +274,11 @@ func (f *Fluence) AskFlux(ctx context.Context, pod *v1.Pod, count int) error {
 	// otherwise it's going to try to use the allocation (but there is none)
 	r, err := grpcclient.Match(context.Background(), request)
 	if err != nil {
-		klog.Errorf("[Fluence] did not receive any match response: %v", err)
+		klog.Errorf("[Fluence] did not receive any match response: %v\n", err)
 		return err
 	}
 
-	klog.Infof("[Fluence] response podID %s", r.GetPodID())
+	klog.Infof("[Fluence] response podID %s\n", r.GetPodID())
 
 	// Presence of a podGroup is indicated by a groupName
 	// Flag that the group is allocated (yes we also have the job id, testing for now)
@@ -286,20 +286,21 @@ func (f *Fluence) AskFlux(ctx context.Context, pod *v1.Pod, count int) error {
 
 	// Get the nodelist and inspect
 	nodes := r.GetNodelist()
-	klog.Infof("[Fluence] Nodelist returned from Fluxion: %s", nodes)
+	klog.Infof("[Fluence] Nodelist returned from Fluxion: %s\n", nodes)
 
 	nodelist := fcore.CreateNodePodsList(nodes, pg.Name)
-	klog.Infof("[Fluence] parsed node pods list %s", nodelist)
+	klog.Infof("[Fluence] parsed node pods list %s\n", nodelist)
 	jobid := uint64(r.GetJobID())
 
 	f.mutex.Lock()
 	f.podNameToJobId[pod.Name] = jobid
-	klog.Info("[Fluence] Check job assignment: ", f.podNameToJobId)
+	klog.Infof("[Fluence] Check job assignment: %s\n", f.podNameToJobId)
 	f.mutex.Unlock()
 	return nil
 }
 
 // cancelFluxJobForPod cancels the flux job for a pod.
+// We assume that the cancelled job also means deleting the pod group
 func (f *Fluence) cancelFluxJobForPod(pod *v1.Pod) error {
 	jobid := f.podNameToJobId[pod.Name]
 
@@ -332,8 +333,7 @@ func (f *Fluence) cancelFluxJobForPod(pod *v1.Pod) error {
 		delete(f.podNameToJobId, pod.Name)
 
 		// If we are successful, clear the group allocated nodes
-		pg := f.getPodsGroup(pod)
-		pg.CancelAllocation()
+		f.DeleteFluenceGroup(pod)
 	} else {
 		klog.Warningf("[Fluence] Failed to cancel flux job %v for pod %s", jobid, pod.Name)
 	}
@@ -386,7 +386,7 @@ func (f *Fluence) updatePod(oldObj, newObj interface{}) {
 // deletePod handles the delete event handler
 // TODO when should we clear group from the cache?
 func (f *Fluence) deletePod(podObj interface{}) {
-	klog.Info("Delete Pod event handler")
+	klog.Info("[Fluence] Delete Pod event handler")
 
 	pod := podObj.(*v1.Pod)
 	klog.Infof("[Fluence] Delete pod has status %s", pod.Status.Phase)
diff --git a/sig-scheduler-plugins/pkg/fluence/group.go b/sig-scheduler-plugins/pkg/fluence/group.go
index 6f55a8b..f2cdf21 100644
--- a/sig-scheduler-plugins/pkg/fluence/group.go
+++ b/sig-scheduler-plugins/pkg/fluence/group.go
@@ -44,11 +44,11 @@ func (f *Fluence) ensureFluenceGroup(pod *v1.Pod) string {
 	// If there isn't a group, make a single node sized group
 	// This is so we can always treat the cases equally
 	if groupName == "" {
-		klog.Infof("  [Fluence] Group annotation missing for pod %s", pod.Name)
+		klog.Infof("   [Fluence] Group annotation missing for pod %s", pod.Name)
 		groupName = f.getDefaultGroupName(pod)
 	}
-	klog.Infof("  [Fluence] Group name for %s is %s", pod.Name, groupName)
-	klog.Infof("  [Fluence] Group size for %s is %d", pod.Name, groupSize)
+	klog.Infof("   [Fluence] Group name for %s is %s", pod.Name, groupName)
+	klog.Infof("   [Fluence] Group size for %s is %d", pod.Name, groupSize)
 
 	// Register the pod group (with the pod) in our cache
 	fcore.RegisterPodGroup(pod, groupName, groupSize)
@@ -56,11 +56,12 @@ func (f *Fluence) ensureFluenceGroup(pod *v1.Pod) string {
 }
 
 // deleteFluenceGroup ensures the pod group is deleted, if it exists
-func (f *Fluence) deleteFluenceGroup(pod *v1.Pod) {
-
+func (f *Fluence) DeleteFluenceGroup(pod *v1.Pod) {
 	// Get the group name and size from the fluence labels
 	pg := f.getPodsGroup(pod)
 	fcore.DeletePodGroup(pg.Name)
+	klog.Infof("   [Fluence] known groups are:\n")
+	fcore.ListGroups()
 }
 
 // getFluenceGroupName looks for the group to indicate a fluence group, and returns it
@@ -82,7 +83,7 @@ func (f *Fluence) getFluenceGroupSize(pod *v1.Pod) int32 {
 	// that doesn't convert nicely. They can find this in the logs.
 	intSize, err := strconv.ParseUint(size, 10, 32)
 	if err != nil {
-		klog.Error("  [Fluence] Parsing integer size for pod group")
+		klog.Error("   [Fluence] Parsing integer size for pod group")
 	}
 	return int32(intSize)
 }
@@ -94,10 +95,10 @@ func (f *Fluence) getCreationTimestamp(groupName string, podInfo *framework.Queu
 	// IsZero is an indicator if this was actually set
 	// If the group label was present and we have a group, this will be true
 	if !pg.TimeCreated.IsZero() {
-		klog.Infof("  [Fluence] Pod group %s was created at %s\n", groupName, pg.TimeCreated)
+		klog.Infof("   [Fluence] Pod group %s was created at %s\n", groupName, pg.TimeCreated)
 		return pg.TimeCreated
 	}
 	// We should actually never get here.
-	klog.Errorf("  [Fluence] Pod group %s time IsZero, we should not have reached here", groupName)
+	klog.Errorf("   [Fluence] Pod group %s time IsZero, we should not have reached here", groupName)
 	return metav1.NewMicroTime(*podInfo.InitialAttemptTimestamp)
 }

From 275cd04414ebbda845fa26094171556c9e220fdc Mon Sep 17 00:00:00 2001
From: vsoch <vsoch@users.noreply.github.com>
Date: Thu, 18 Jan 2024 16:16:32 -0700
Subject: [PATCH 07/28] clean up logging and unused files

We install with the helm manifests, and the old fluence
manifests might be confusing (they have changed). This commit
will remove the old manifests, and also change some of the fmt.Print
logging to use klog to be easier to parse.

Signed-off-by: vsoch <vsoch@users.noreply.github.com>
---
 Makefile                                      |  1 -
 .../manifests/fluence/configmap.yaml          | 23 ------
 .../manifests/fluence/deploy.yaml             | 45 ----------
 .../manifests/fluence/rbac.yaml               | 82 -------------------
 .../scheduling.sigs.k8s.io_podgroups.yaml     |  1 -
 .../manifests/fluence/serviceaccount.yaml     | 10 ---
 .../pkg/fluence/core/core.go                  | 23 +++---
 sig-scheduler-plugins/pkg/fluence/fluence.go  | 16 +++-
 sig-scheduler-plugins/pkg/fluence/group.go    |  8 +-
 .../pkg/fluence/utils/utils.go                |  5 +-
 src/fluence/fluxion/fluxion.go                | 33 ++++----
 src/fluence/utils/utils.go                    |  1 -
 12 files changed, 47 insertions(+), 201 deletions(-)
 delete mode 100644 sig-scheduler-plugins/manifests/fluence/configmap.yaml
 delete mode 100644 sig-scheduler-plugins/manifests/fluence/deploy.yaml
 delete mode 100644 sig-scheduler-plugins/manifests/fluence/rbac.yaml
 delete mode 120000 sig-scheduler-plugins/manifests/fluence/scheduling.sigs.k8s.io_podgroups.yaml
 delete mode 100644 sig-scheduler-plugins/manifests/fluence/serviceaccount.yaml

diff --git a/Makefile b/Makefile
index 907f96e..1356160 100644
--- a/Makefile
+++ b/Makefile
@@ -28,7 +28,6 @@ prepare: clone
 	rm -rf $(CLONE_UPSTREAM)/pkg/fluence
 	rm -rf $(CLONE_UPSTREAM)/manifests/fluence
 	cp -R sig-scheduler-plugins/pkg/fluence $(CLONE_UPSTREAM)/pkg/fluence
-	cp -R sig-scheduler-plugins/manifests/fluence $(CLONE_UPSTREAM)/manifests/fluence
 	# This is the one exception not from sig-scheduler-plugins because it is needed in both spots
 	cp -R src/fluence/fluxcli-grpc $(CLONE_UPSTREAM)/pkg/fluence/fluxcli-grpc
 	# These are files with subtle changes to add fluence
diff --git a/sig-scheduler-plugins/manifests/fluence/configmap.yaml b/sig-scheduler-plugins/manifests/fluence/configmap.yaml
deleted file mode 100644
index 21ffacc..0000000
--- a/sig-scheduler-plugins/manifests/fluence/configmap.yaml
+++ /dev/null
@@ -1,23 +0,0 @@
-apiVersion: v1
-kind: ConfigMap
-metadata:
-  name: scheduler-config
-  namespace: scheduler-plugins 
-data:
-  scheduler-config.yaml: |
-    apiVersion: kubescheduler.config.k8s.io/v1beta3
-    kind: KubeSchedulerConfiguration
-    leaderElection:
-      leaderElect: false
-    profiles:
-    - schedulerName: fluence
-      plugins:
-        preFilter:
-          enabled:
-          - name: Fluence
-        filter:
-          enabled:
-          - name: Fluence
-        score:
-          disabled:
-          - name: '*'
\ No newline at end of file
diff --git a/sig-scheduler-plugins/manifests/fluence/deploy.yaml b/sig-scheduler-plugins/manifests/fluence/deploy.yaml
deleted file mode 100644
index 92e39b0..0000000
--- a/sig-scheduler-plugins/manifests/fluence/deploy.yaml
+++ /dev/null
@@ -1,45 +0,0 @@
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: fluence
-  namespace: scheduler-plugins
-spec:
-  replicas: 1
-  selector:
-    matchLabels:
-      component: scheduler
-  template:
-    metadata:
-      labels:
-        component: scheduler
-    spec:
-      serviceAccountName: scheduler-plugins
-      containers:
-        - image: quay.io/cmisale1/fluence-sidecar:latest
-          imagePullPolicy: Always
-          command:
-          - /go/src/fluence/bin/server
-          - --policy=lonode
-          name: fluence-sidecar
-        - image: quay.io/cmisale1/fluence:dev
-          imagePullPolicy: Always
-          command:
-          - /bin/kube-scheduler
-          - --config=/etc/kubernetes/scheduler-config.yaml
-          - -v=9
-          name: fluence
-          resources:
-            requests:
-              cpu: '0.1'
-          securityContext:
-            privileged: false
-          volumeMounts:
-          - mountPath: /etc/kubernetes
-            name: scheduler-config
-      hostNetwork: false
-      hostPID: false
-      volumes:
-      - name: scheduler-config
-        configMap:
-          name: scheduler-config
-        
diff --git a/sig-scheduler-plugins/manifests/fluence/rbac.yaml b/sig-scheduler-plugins/manifests/fluence/rbac.yaml
deleted file mode 100644
index 3416e18..0000000
--- a/sig-scheduler-plugins/manifests/fluence/rbac.yaml
+++ /dev/null
@@ -1,82 +0,0 @@
-kind: ClusterRole
-apiVersion: rbac.authorization.k8s.io/v1
-metadata:
-  name: scheduler-plugins
-rules:
-- apiGroups: [""]
-  resources: ["namespaces", "configmaps"]
-  verbs: ["get", "list", "watch"]
-- apiGroups: ["", "events.k8s.io"]
-  resources: ["events"]
-  verbs: ["create", "patch", "update"]
-- apiGroups: ["coordination.k8s.io"]
-  resources: ["leases"]
-  verbs: ["create"]
-- apiGroups: ["coordination.k8s.io"]
-  resourceNames: ["kube-scheduler"]
-  resources: ["leases"]
-  verbs: ["get", "update"]
-- apiGroups: [""]
-  resources: ["endpoints"]
-  verbs: ["create"]
-- apiGroups: [""]
-  resourceNames: ["kube-scheduler"]
-  resources: ["endpoints"]
-  verbs: ["get", "update"]
-- apiGroups: [""]
-  resources: ["nodes"]
-  verbs: ["get", "list", "watch", "patch"]
-- apiGroups: [""]
-  resources: ["pods"]
-  verbs: ["delete", "get", "list", "watch", "update"]
-- apiGroups: [""]
-  resources: ["bindings", "pods/binding"]
-  verbs: ["create"]
-- apiGroups: [""]
-  resources: ["pods/status"]
-  verbs: ["patch", "update"]
-- apiGroups: [""]
-  resources: ["replicationcontrollers", "services"]
-  verbs: ["get", "list", "watch"]
-- apiGroups: ["apps", "extensions"]
-  resources: ["replicasets"]
-  verbs: ["get", "list", "watch"]
-- apiGroups: ["apps"]
-  resources: ["statefulsets"]
-  verbs: ["get", "list", "watch"]
-- apiGroups: ["policy"]
-  resources: ["poddisruptionbudgets"]
-  verbs: ["get", "list", "watch"]
-- apiGroups: [""]
-  resources: ["persistentvolumeclaims", "persistentvolumes"]
-  verbs: ["get", "list", "watch", "patch", "update"]
-- apiGroups: ["authentication.k8s.io"]
-  resources: ["tokenreviews"]
-  verbs: ["create"]
-- apiGroups: ["authorization.k8s.io"]
-  resources: ["subjectaccessreviews"]
-  verbs: ["create"]
-- apiGroups: ["storage.k8s.io"]
-  resources: ["csinodes", "storageclasses" , "csidrivers" , "csistoragecapacities"]
-  verbs: ["get", "list", "watch"]
-- apiGroups: ["topology.node.k8s.io"]
-  resources: ["noderesourcetopologies"]
-  verbs: ["*"]
-# resources need to be updated with the scheduler plugins used
-- apiGroups: ["scheduling.sigs.k8s.io"]
-  resources: ["podgroups", "elasticquotas"]
-  verbs: ["get", "list", "watch", "create", "delete", "update", "patch"]
----
-kind: ClusterRoleBinding
-apiVersion: rbac.authorization.k8s.io/v1
-metadata:
-  name: scheduler-plugins
-  namespace: scheduler-plugins
-roleRef:
-  apiGroup: rbac.authorization.k8s.io
-  kind: ClusterRole
-  name: scheduler-plugins
-subjects:
-  - kind: ServiceAccount
-    name: scheduler-plugins
-    namespace: scheduler-plugins 
diff --git a/sig-scheduler-plugins/manifests/fluence/scheduling.sigs.k8s.io_podgroups.yaml b/sig-scheduler-plugins/manifests/fluence/scheduling.sigs.k8s.io_podgroups.yaml
deleted file mode 120000
index 7f8408e..0000000
--- a/sig-scheduler-plugins/manifests/fluence/scheduling.sigs.k8s.io_podgroups.yaml
+++ /dev/null
@@ -1 +0,0 @@
-../coscheduling/crd.yaml
\ No newline at end of file
diff --git a/sig-scheduler-plugins/manifests/fluence/serviceaccount.yaml b/sig-scheduler-plugins/manifests/fluence/serviceaccount.yaml
deleted file mode 100644
index fface49..0000000
--- a/sig-scheduler-plugins/manifests/fluence/serviceaccount.yaml
+++ /dev/null
@@ -1,10 +0,0 @@
-apiVersion: v1
-kind: Namespace
-metadata:
-  name: scheduler-plugins
----
-apiVersion: v1
-kind: ServiceAccount
-metadata:
-  name: scheduler-plugins
-  namespace: scheduler-plugins
\ No newline at end of file
diff --git a/sig-scheduler-plugins/pkg/fluence/core/core.go b/sig-scheduler-plugins/pkg/fluence/core/core.go
index 135659f..53a627e 100644
--- a/sig-scheduler-plugins/pkg/fluence/core/core.go
+++ b/sig-scheduler-plugins/pkg/fluence/core/core.go
@@ -6,6 +6,7 @@ import (
 	v1 "k8s.io/api/core/v1"
 
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	"k8s.io/klog/v2"
 
 	"k8s.io/kubernetes/pkg/scheduler/framework"
 	pb "sigs.k8s.io/scheduler-plugins/pkg/fluence/fluxcli-grpc"
@@ -102,13 +103,13 @@ func RegisterPodGroup(pod *v1.Pod, groupName string, groupSize int32) error {
 		}
 
 		// Tell the user when it was created
-		fmt.Printf("[Fluence] Pod group %s was created at %s\n", entry.Name, entry.TimeCreated)
+		klog.Infof("[Fluence] Pod group %s was created at %s\n", entry.Name, entry.TimeCreated)
 	}
 
 	// If the size has changed, we currently do not allow updating it.
 	// We issue a warning. In the future this could be supported with a grow command.
 	if entry.Size != groupSize {
-		fmt.Printf("[Fluence] Pod group %s request to change size from %s to %s is not yet supported\n", groupName, entry.Size, groupSize)
+		klog.Infof("[Fluence] Pod group %s request to change size from %s to %s is not yet supported\n", groupName, entry.Size, groupSize)
 		// entry.GroupSize = groupSize
 	}
 	podGroupCache[groupName] = entry
@@ -148,7 +149,7 @@ func CreateNodePodsList(nodelist []*pb.NodeAlloc, groupName string) (nodepods []
 
 	// Update the pods in the PodGraphCache
 	updatePodGroupNodes(groupName, nodepods)
-	fmt.Printf("[Fluence] Pod group cache updated with nodes\n", podGroupCache)
+	klog.Infof("[Fluence] Pod group cache updated with nodes\n", podGroupCache)
 	return nodepods
 }
 
@@ -175,30 +176,28 @@ func (p *PodGroupCache) CancelAllocation() {
 func GetNextNode(groupName string) (string, error) {
 	entry, ok := podGroupCache[groupName]
 	if !ok {
-		err := fmt.Errorf("[Fluence] Map is empty\n")
-		return "", err
+		return "", fmt.Errorf("[Fluence] Map is empty\n")
 	}
 	if len(entry.Nodes) == 0 {
-		err := fmt.Errorf("[Fluence] Error while getting a node\n")
-		return "", err
+		return "", fmt.Errorf("[Fluence] Error while getting a node\n")
 	}
 
 	nodename := entry.Nodes[0].NodeName
-	fmt.Printf("[Fluence] Next node for group %s is %s", groupName, nodename)
+	klog.Infof("[Fluence] Next node for group %s is %s", groupName, nodename)
 
 	if entry.Nodes[0].Tasks == 1 {
-		fmt.Println("[Fluence] First node has one task")
+		klog.Infof("[Fluence] First node has one task")
 		slice := entry.Nodes[1:]
 		if len(slice) == 0 {
-			fmt.Printf("[Fluence] After this node, the slice is empty, deleting group %s from cache\n", groupName)
+			klog.Infof("[Fluence] After this node, the slice is empty, deleting group %s from cache\n", groupName)
 			delete(podGroupCache, groupName)
 			return nodename, nil
 		}
-		fmt.Println("[Fluence] After this node, the slide still has nodes")
+		klog.Infof("[Fluence] After this node, the slide still has nodes")
 		updatePodGroupNodes(groupName, slice)
 		return nodename, nil
 	}
-	fmt.Println("[Fluence] Subtracting one task from first node")
+	klog.Infof("[Fluence] Subtracting one task from first node")
 	entry.Nodes[0].Tasks = entry.Nodes[0].Tasks - 1
 	return nodename, nil
 }
diff --git a/sig-scheduler-plugins/pkg/fluence/fluence.go b/sig-scheduler-plugins/pkg/fluence/fluence.go
index 6752764..145dfb0 100644
--- a/sig-scheduler-plugins/pkg/fluence/fluence.go
+++ b/sig-scheduler-plugins/pkg/fluence/fluence.go
@@ -44,9 +44,14 @@ import (
 )
 
 type Fluence struct {
-	mutex          sync.Mutex
-	handle         framework.Handle
-	client         client.Client
+	mutex  sync.Mutex
+	handle framework.Handle
+	client client.Client
+
+	// Important: I tested moving this into the group, but it's a bad idea because
+	// we need to delete the group after the last allocation is given, and then we
+	// no longer have the ID. It might be a better approach to delete it elsewhere
+	// (but I'm not sure where that elsewhere could be)
 	podNameToJobId map[string]uint64
 	pgMgr          coschedulingcore.Manager
 }
@@ -250,6 +255,7 @@ func (f *Fluence) AskFlux(ctx context.Context, pod *v1.Pod, count int) error {
 		f.mutex.Unlock()
 	}
 
+	// Does the task name here matter? We are naming the entire group for the pod
 	jobspec := utils.InspectPodInfo(pod)
 	klog.Infof("[Fluence] Inspect pod info, jobspec: %s\n", jobspec)
 	conn, err := grpc.Dial("127.0.0.1:4242", grpc.WithInsecure())
@@ -413,4 +419,8 @@ func (f *Fluence) deletePod(podObj interface{}) {
 			klog.Infof("[Fluence] Deleted pod %s/%s doesn't have flux jobid", pod.Namespace, pod.Name)
 		}
 	}
+
+	// We assume that a request to delete one pod means all of them.
+	// We have to take an all or nothing approach for now
+	f.DeleteFluenceGroup(pod)
 }
diff --git a/sig-scheduler-plugins/pkg/fluence/group.go b/sig-scheduler-plugins/pkg/fluence/group.go
index f2cdf21..84641f1 100644
--- a/sig-scheduler-plugins/pkg/fluence/group.go
+++ b/sig-scheduler-plugins/pkg/fluence/group.go
@@ -44,11 +44,11 @@ func (f *Fluence) ensureFluenceGroup(pod *v1.Pod) string {
 	// If there isn't a group, make a single node sized group
 	// This is so we can always treat the cases equally
 	if groupName == "" {
-		klog.Infof("   [Fluence] Group annotation missing for pod %s", pod.Name)
+		klog.Infof("[Fluence] Group annotation missing for pod %s", pod.Name)
 		groupName = f.getDefaultGroupName(pod)
 	}
-	klog.Infof("   [Fluence] Group name for %s is %s", pod.Name, groupName)
-	klog.Infof("   [Fluence] Group size for %s is %d", pod.Name, groupSize)
+	klog.Infof("[Fluence] Group name for %s is %s", pod.Name, groupName)
+	klog.Infof("[Fluence] Group size for %s is %d", pod.Name, groupSize)
 
 	// Register the pod group (with the pod) in our cache
 	fcore.RegisterPodGroup(pod, groupName, groupSize)
@@ -60,7 +60,7 @@ func (f *Fluence) DeleteFluenceGroup(pod *v1.Pod) {
 	// Get the group name and size from the fluence labels
 	pg := f.getPodsGroup(pod)
 	fcore.DeletePodGroup(pg.Name)
-	klog.Infof("   [Fluence] known groups are:\n")
+	klog.Infof("[Fluence] known groups are:\n")
 	fcore.ListGroups()
 }
 
diff --git a/sig-scheduler-plugins/pkg/fluence/utils/utils.go b/sig-scheduler-plugins/pkg/fluence/utils/utils.go
index 53e9c4a..e384669 100644
--- a/sig-scheduler-plugins/pkg/fluence/utils/utils.go
+++ b/sig-scheduler-plugins/pkg/fluence/utils/utils.go
@@ -52,8 +52,9 @@ func InspectPodInfo(pod *v1.Pod) *pb.PodSpec {
 	ps.Labels = getPodJobspecLabels(pod)
 
 	// Note that Container gets use for the JobSpec, so we provide
-	// the pod name (to be associated with tasks) for it. We likely
-	// should change this identifier eventually.
+	// the pod name (to be associated with tasks) for it. We are making
+	// the assumption that this one container represents the group,
+	// which is OK for now, but might not always be true!
 	ps.Container = fmt.Sprintf("%s-%s", pod.Namespace, pod.Name)
 
 	// Create accumulated requests for cpu and limits
diff --git a/src/fluence/fluxion/fluxion.go b/src/fluence/fluxion/fluxion.go
index 2a8fd7f..18d6735 100644
--- a/src/fluence/fluxion/fluxion.go
+++ b/src/fluence/fluxion/fluxion.go
@@ -7,10 +7,10 @@ import (
 	"github.com/flux-framework/flux-k8s/flux-plugin/fluence/jobspec"
 	"github.com/flux-framework/flux-k8s/flux-plugin/fluence/utils"
 	"github.com/flux-framework/flux-sched/resource/reapi/bindings/go/src/fluxcli"
+	"k8s.io/klog/v2"
 
 	"context"
 	"errors"
-	"fmt"
 )
 
 type Fluxion struct {
@@ -22,8 +22,7 @@ type Fluxion struct {
 func (f *Fluxion) InitFluxion(policy *string, label *string) {
 	f.cli = fluxcli.NewReapiClient()
 
-	fmt.Println("Created flux resource client ", f.cli)
-	fmt.Printf("%+v\n", f.cli)
+	klog.Infof("[Fluence] Created flux resource client ", f.cli)
 	filename := "/home/data/jgf/kubecluster.json"
 	err := utils.CreateJGF(filename, label)
 	if err != nil {
@@ -32,14 +31,14 @@ func (f *Fluxion) InitFluxion(policy *string, label *string) {
 
 	jgf, err := os.ReadFile(filename)
 	if err != nil {
-		fmt.Println("Error reading JGF")
+		klog.Error("Error reading JGF")
 		return
 	}
 
 	p := "{}"
 	if *policy != "" {
 		p = string("{\"matcher_policy\": \"" + *policy + "\"}")
-		fmt.Println("Match policy: ", p)
+		klog.Infof("[Fluence] match policy: ", p)
 	}
 
 	f.cli.InitContext(string(jgf), p)
@@ -48,7 +47,7 @@ func (f *Fluxion) InitFluxion(policy *string, label *string) {
 // Cancel wraps the Cancel function of the fluxion go bindings
 func (s *Fluxion) Cancel(ctx context.Context, in *pb.CancelRequest) (*pb.CancelResponse, error) {
 
-	fmt.Printf("[GRPCServer] Received Cancel request %v\n", in)
+	klog.Infof("[Fluence] received cancel request %v\n", in)
 	err := s.cli.Cancel(int64(in.JobID), true)
 	if err != nil {
 		return nil, errors.New("Error in Cancel")
@@ -57,14 +56,14 @@ func (s *Fluxion) Cancel(ctx context.Context, in *pb.CancelRequest) (*pb.CancelR
 	// Why would we have an error code here if we check above?
 	// This (I think) should be an error code for the specific job
 	dr := &pb.CancelResponse{JobID: in.JobID}
-	fmt.Printf("[GRPCServer] Sending Cancel response %v\n", dr)
-	fmt.Printf("[CancelRPC] Errors so far: %s\n", s.cli.GetErrMsg())
+	klog.Infof("[Fluence] sending cancel response %v\n", dr)
+	klog.Infof("[Fluence] cancel errors so far: %s\n", s.cli.GetErrMsg())
 
 	reserved, at, overhead, mode, fluxerr := s.cli.Info(int64(in.JobID))
-	fmt.Println("\n\t----Job Info output---")
-	fmt.Printf("jobid: %d\nreserved: %t\nat: %d\noverhead: %f\nmode: %s\nerror: %d\n", in.JobID, reserved, at, overhead, mode, fluxerr)
+	klog.Infof("\n\t----Job Info output---")
+	klog.Infof("jobid: %d\nreserved: %t\nat: %d\noverhead: %f\nmode: %s\nerror: %d\n", in.JobID, reserved, at, overhead, mode, fluxerr)
 
-	fmt.Printf("[GRPCServer] Sending Cancel response %v\n", dr)
+	klog.Infof("[GRPCServer] Sending Cancel response %v\n", dr)
 	return dr, nil
 }
 
@@ -96,7 +95,7 @@ func (s *Fluxion) Match(ctx context.Context, in *pb.MatchRequest) (*pb.MatchResp
 	emptyResponse := &pb.MatchResponse{}
 
 	// Prepare an empty match response (that can still be serialized)
-	fmt.Printf("[Fluence][MatchRPC] Received Match request %v\n", in)
+	klog.Infof("[Fluence] Received Match request %v\n", in)
 
 	// Generate the jobspec, written to temporary file and read as string
 	spec, err := s.generateJobspec(in)
@@ -111,12 +110,12 @@ func (s *Fluxion) Match(ctx context.Context, in *pb.MatchRequest) (*pb.MatchResp
 	// Be explicit about errors (or not)
 	errorMessages := s.cli.GetErrMsg()
 	if errorMessages == "" {
-		fmt.Println("[Fluence][MatchRPC] There are no errors")
+		klog.Infof("[Fluence] There are no errors")
 	} else {
-		fmt.Printf("[Fluence][MatchRPC] Errors so far: %s\n", errorMessages)
+		klog.Infof("[Fluence] Match errors so far: %s\n", errorMessages)
 	}
 	if fluxerr != nil {
-		fmt.Printf("[Fluence][MatchRPC] Flux err is %w\n", fluxerr)
+		klog.Infof("[Fluence] Match Flux err is %w\n", fluxerr)
 		return emptyResponse, errors.New("[Fluence] Error in ReapiCliMatchAllocate")
 	}
 
@@ -124,7 +123,7 @@ func (s *Fluxion) Match(ctx context.Context, in *pb.MatchRequest) (*pb.MatchResp
 	// We need to return an error here otherwise we try to pass an empty string
 	// to other RPC endpoints and get back an error.
 	if allocated == "" {
-		fmt.Println("[Fluence][MatchRPC] Allocated is empty")
+		klog.Infof("[Fluence] Allocated is empty")
 		return emptyResponse, errors.New("Allocation was not possible")
 	}
 
@@ -139,6 +138,6 @@ func (s *Fluxion) Match(ctx context.Context, in *pb.MatchRequest) (*pb.MatchResp
 		}
 	}
 	mr := &pb.MatchResponse{PodID: in.Ps.Id, Nodelist: nodetaskslist, JobID: int64(jobid)}
-	fmt.Printf("[GRPCServer] Response %v \n", mr)
+	klog.Infof("[Fluence] Match response %v \n", mr)
 	return mr, nil
 }
diff --git a/src/fluence/utils/utils.go b/src/fluence/utils/utils.go
index 961a77a..f30eeda 100644
--- a/src/fluence/utils/utils.go
+++ b/src/fluence/utils/utils.go
@@ -206,7 +206,6 @@ type allocation struct {
 func ParseAllocResult(allocated, podName string) []allocation {
 	var dat map[string]interface{}
 	result := []allocation{}
-	fmt.Printf("Raw allocated response: %s\n", allocated)
 
 	// Keep track of total core count across allocated
 	corecount := 0

From f243852e3cbfe400d49d89edeaf7d1d921a35cb1 Mon Sep 17 00:00:00 2001
From: vsoch <vsoch@users.noreply.github.com>
Date: Thu, 18 Jan 2024 19:36:56 -0700
Subject: [PATCH 08/28] support for skeleton grpc server and service/ingress
 for external client

This adds a prototype support for an extra helm flag that
dually enables adding an extra grpc set of endpoints, and then
the configs (ingress and service) necessary to expose them.
I next need to figure out how to interact with grpc from
a local client, likely built from the same codebase and grpc
spec. This is super cool!!

Signed-off-by: vsoch <vsoch@users.noreply.github.com>
---
 README.md                                     |  39 +-
 .../templates/deployment.yaml                 |  36 ++
 .../charts/as-a-second-scheduler/values.yaml  |   6 +
 sig-scheduler-plugins/pkg/fluence/fluence.go  |  17 +-
 .../pkg/fluence/{ => group}/group.go          |  33 +-
 src/Makefile                                  |   1 +
 src/fluence/cmd/main.go                       |  47 ++-
 src/fluence/cmd/main.go.bk                    |  15 -
 src/fluence/defaults/defaults.go              |   5 +
 src/fluence/fluxcli-grpc/fluxcli.proto        |   5 +-
 src/fluence/fluxion/fluxion.go                |  10 +-
 src/fluence/service-grpc/service.pb.go        | 351 ++++++++++++++++++
 src/fluence/service-grpc/service.proto        |  34 ++
 src/fluence/service-grpc/service_grpc.pb.go   | 181 +++++++++
 src/fluence/service/service.go                |  61 +++
 15 files changed, 785 insertions(+), 56 deletions(-)
 rename sig-scheduler-plugins/pkg/fluence/{ => group}/group.go (79%)
 delete mode 100644 src/fluence/cmd/main.go.bk
 create mode 100644 src/fluence/defaults/defaults.go
 create mode 100644 src/fluence/service-grpc/service.pb.go
 create mode 100644 src/fluence/service-grpc/service.proto
 create mode 100644 src/fluence/service-grpc/service_grpc.pb.go
 create mode 100644 src/fluence/service/service.go

diff --git a/README.md b/README.md
index 4431050..0433799 100644
--- a/README.md
+++ b/README.md
@@ -521,7 +521,7 @@ make build REGISTRY=ghcr.io/vsoch
 
 And then install with your custom images:
 
-```
+```bash
 cd ./upstream/manifests/install/charts
 helm install \
   --set scheduler.image=ghcr.io/vsoch/fluence:latest \
@@ -532,6 +532,43 @@ helm install \
 And then apply what you need to test, and look at logs! 
 And then keep doing that until you get what you want :) Note that I haven't found a good way for the VSCode developer tools to work because we develop fluence outside of the tree it's supposed to be in.
 
+##### kubectl plugin
+
+Note that if you want to enable extra endpoints for the fluence kubectl plugin and expose the GRPC as a service, you can do:
+
+```bash
+helm install \
+  --set scheduler.image=ghcr.io/vsoch/fluence:latest \
+  --set scheduler.enableExternalService=true \
+  --set scheduler.sidecarimage=ghcr.io/vsoch/fluence-sidecar:latest \
+        schedscheduler-plugins as-a-second-scheduler/
+```
+
+For this setup if you are developing locally with kind, you will need to enable the ingress. Here is `kind-config.yaml`
+
+```yaml
+kind: Cluster
+apiVersion: kind.x-k8s.io/v1alpha4
+nodes:
+- role: control-plane
+  kubeadmConfigPatches:
+  - |
+    kind: InitConfiguration
+    nodeRegistration:
+      kubeletExtraArgs:
+        node-labels: "ingress-ready=true"
+  extraPortMappings:
+  - containerPort: 4242
+    hostPort: 4242
+    protocol: TCP
+```
+
+And to create:
+
+```bash
+kind create cluster --config ./kind-config.yaml
+```
+
 #### Components
 
  - [FluxStateData](sig-scheduler-plugins/pkg/fluence/core/core.go): is given to the [framework.CycleState](https://github.com/kubernetes/kubernetes/blob/242b41b36a20032f99e8a059ca0a5d764105217b/pkg/scheduler/framework/cycle_state.go#L48) and serves as a vehicle to store a cache of node name assignment.
diff --git a/sig-scheduler-plugins/manifests/install/charts/as-a-second-scheduler/templates/deployment.yaml b/sig-scheduler-plugins/manifests/install/charts/as-a-second-scheduler/templates/deployment.yaml
index ffc3ce7..83ecccc 100644
--- a/sig-scheduler-plugins/manifests/install/charts/as-a-second-scheduler/templates/deployment.yaml
+++ b/sig-scheduler-plugins/manifests/install/charts/as-a-second-scheduler/templates/deployment.yaml
@@ -37,6 +37,7 @@ spec:
     metadata:
       labels:
         component: scheduler
+        app: fluence-scheduler
     spec:
       serviceAccountName: {{ .Values.scheduler.name }}
       containers:
@@ -45,7 +46,13 @@ spec:
         command:
         - /go/src/fluence/bin/server
         - --policy={{ .Values.scheduler.policy }}
+        - --port={{ .Values.scheduler.port }}
+        {{ if .Values.scheduler.enableExternalService }}- --external-service{{ end }}
         name: sidecar
+        # These are exposed for the kubectl plugin
+        {{ if .Values.scheduler.enableExternalService }}ports:
+          - containerPort: {{ .Values.scheduler.port }}
+            hostPort: {{ .Values.scheduler.port }}{{ end }}
       - command:
         - /bin/kube-scheduler
         - --config=/etc/kubernetes/scheduler-config.yaml
@@ -79,3 +86,32 @@ spec:
       - name: scheduler-config
         configMap:
           name: scheduler-config
+{{ if .Values.scheduler.enableExternalService }}---
+apiVersion: v1
+kind: Service
+metadata:
+  name: fluence-service
+spec:
+  type: NodePort
+  selector:
+    app: fluence-scheduler
+  ports:
+  - port: {{ .Values.scheduler.port }}
+    targetPort: {{ .Values.scheduler.port }}
+---
+apiVersion: networking.k8s.io/v1
+kind: Ingress
+metadata:
+  name: fluence-ingress
+spec:
+  rules:
+  - host: localhost
+    http:
+      paths:
+      - path: /
+        pathType: Prefix
+        backend:
+          service:
+            name: fluence-service
+            port: 
+              number: {{ .Values.scheduler.port }}{{ end }}
\ No newline at end of file
diff --git a/sig-scheduler-plugins/manifests/install/charts/as-a-second-scheduler/values.yaml b/sig-scheduler-plugins/manifests/install/charts/as-a-second-scheduler/values.yaml
index 38da251..2a35a3a 100644
--- a/sig-scheduler-plugins/manifests/install/charts/as-a-second-scheduler/values.yaml
+++ b/sig-scheduler-plugins/manifests/install/charts/as-a-second-scheduler/values.yaml
@@ -13,6 +13,12 @@ scheduler:
   sidecarPullPolicy: Always
   loggingLevel: "9"
 
+  # Port is for GRPC, and enabling the external service will also
+  # create the service and ingress to it, along with adding
+  # additional API endpoints for our TBA kubectl plugin
+  enableExternalService: false
+  port: 4242
+
 controller:
   name: scheduler-plugins-controller
   image: registry.k8s.io/scheduler-plugins/controller:v0.27.8
diff --git a/sig-scheduler-plugins/pkg/fluence/fluence.go b/sig-scheduler-plugins/pkg/fluence/fluence.go
index 145dfb0..26282e5 100644
--- a/sig-scheduler-plugins/pkg/fluence/fluence.go
+++ b/sig-scheduler-plugins/pkg/fluence/fluence.go
@@ -40,6 +40,7 @@ import (
 	coschedulingcore "sigs.k8s.io/scheduler-plugins/pkg/coscheduling/core"
 	fcore "sigs.k8s.io/scheduler-plugins/pkg/fluence/core"
 	pb "sigs.k8s.io/scheduler-plugins/pkg/fluence/fluxcli-grpc"
+	fgroup "sigs.k8s.io/scheduler-plugins/pkg/fluence/group"
 	"sigs.k8s.io/scheduler-plugins/pkg/fluence/utils"
 )
 
@@ -151,8 +152,8 @@ func (f *Fluence) Less(podInfo1, podInfo2 *framework.QueuedPodInfo) bool {
 
 	// ensure we have a PodGroup no matter what
 	klog.Infof("[Fluence] Comparing %s and %s", podInfo1.Pod.Name, podInfo2.Pod.Name)
-	podGroup1 := f.ensureFluenceGroup(podInfo1.Pod)
-	podGroup2 := f.ensureFluenceGroup(podInfo2.Pod)
+	podGroup1 := fgroup.EnsureFluenceGroup(podInfo1.Pod)
+	podGroup2 := fgroup.EnsureFluenceGroup(podInfo2.Pod)
 
 	// First preference to priority, but only if they are different
 	prio1 := corev1helpers.PodPriority(podInfo1.Pod)
@@ -166,8 +167,8 @@ func (f *Fluence) Less(podInfo1, podInfo2 *framework.QueuedPodInfo) bool {
 
 	// Fluence can only compare if we have two known groups.
 	// This tries for that first, and falls back to the initial attempt timestamp
-	creationTime1 := f.getCreationTimestamp(podGroup1, podInfo1)
-	creationTime2 := f.getCreationTimestamp(podGroup2, podInfo2)
+	creationTime1 := fgroup.GetCreationTimestamp(podGroup1, podInfo1)
+	creationTime2 := fgroup.GetCreationTimestamp(podGroup2, podInfo2)
 
 	// If they are the same, fall back to sorting by name.
 	if creationTime1.Equal(&creationTime2) {
@@ -188,7 +189,7 @@ func (f *Fluence) PreFilter(
 
 	// groupName will be named according to the single pod namespace / pod if there wasn't
 	// a user defined group. This is a size 1 group we handle equivalently.
-	pg := f.getPodsGroup(pod)
+	pg := fgroup.GetPodsGroup(pod)
 
 	klog.Infof("[Fluence] Pod %s group size %d", pod.Name, pg.Size)
 	klog.Infof("[Fluence] Pod %s group name is %s", pod.Name, pg.Name)
@@ -288,7 +289,7 @@ func (f *Fluence) AskFlux(ctx context.Context, pod *v1.Pod, count int) error {
 
 	// Presence of a podGroup is indicated by a groupName
 	// Flag that the group is allocated (yes we also have the job id, testing for now)
-	pg := f.getPodsGroup(pod)
+	pg := fgroup.GetPodsGroup(pod)
 
 	// Get the nodelist and inspect
 	nodes := r.GetNodelist()
@@ -339,7 +340,7 @@ func (f *Fluence) cancelFluxJobForPod(pod *v1.Pod) error {
 		delete(f.podNameToJobId, pod.Name)
 
 		// If we are successful, clear the group allocated nodes
-		f.DeleteFluenceGroup(pod)
+		fgroup.DeleteFluenceGroup(pod)
 	} else {
 		klog.Warningf("[Fluence] Failed to cancel flux job %v for pod %s", jobid, pod.Name)
 	}
@@ -422,5 +423,5 @@ func (f *Fluence) deletePod(podObj interface{}) {
 
 	// We assume that a request to delete one pod means all of them.
 	// We have to take an all or nothing approach for now
-	f.DeleteFluenceGroup(pod)
+	fgroup.DeleteFluenceGroup(pod)
 }
diff --git a/sig-scheduler-plugins/pkg/fluence/group.go b/sig-scheduler-plugins/pkg/fluence/group/group.go
similarity index 79%
rename from sig-scheduler-plugins/pkg/fluence/group.go
rename to sig-scheduler-plugins/pkg/fluence/group/group.go
index 84641f1..b681504 100644
--- a/sig-scheduler-plugins/pkg/fluence/group.go
+++ b/sig-scheduler-plugins/pkg/fluence/group/group.go
@@ -1,4 +1,4 @@
-package fluence
+package group
 
 import (
 	"fmt"
@@ -19,13 +19,18 @@ const (
 
 // getDefaultGroupName returns a group name based on the pod namespace and name
 // We could do this for pods that are not labeled, and treat them as a size 1 group
-func (f *Fluence) getDefaultGroupName(pod *v1.Pod) string {
+func getDefaultGroupName(pod *v1.Pod) string {
 	return fmt.Sprintf("%s-%s", pod.Namespace, pod.Name)
 }
 
 // getPodsGroup gets the pods group, if it exists.
-func (f *Fluence) getPodsGroup(pod *v1.Pod) *fcore.PodGroupCache {
-	groupName := f.ensureFluenceGroup(pod)
+func GetPodsGroup(pod *v1.Pod) *fcore.PodGroupCache {
+	groupName := EnsureFluenceGroup(pod)
+	return fcore.GetPodGroup(groupName)
+}
+
+// GetGroup is a courtesy wrapper around fcore.GetPodGroup
+func GetGroup(groupName string) *fcore.PodGroupCache {
 	return fcore.GetPodGroup(groupName)
 }
 
@@ -35,17 +40,17 @@ func (f *Fluence) getPodsGroup(pod *v1.Pod) *fcore.PodGroupCache {
 // created and no fluence annotation, we do not create the group.
 // Likely for fluence we'd want a cleanup function somehow too,
 // for now assume groups are unique by name.
-func (f *Fluence) ensureFluenceGroup(pod *v1.Pod) string {
+func EnsureFluenceGroup(pod *v1.Pod) string {
 
 	// Get the group name and size from the fluence labels
-	groupName := f.getFluenceGroupName(pod)
-	groupSize := f.getFluenceGroupSize(pod)
+	groupName := getFluenceGroupName(pod)
+	groupSize := getFluenceGroupSize(pod)
 
 	// If there isn't a group, make a single node sized group
 	// This is so we can always treat the cases equally
 	if groupName == "" {
 		klog.Infof("[Fluence] Group annotation missing for pod %s", pod.Name)
-		groupName = f.getDefaultGroupName(pod)
+		groupName = getDefaultGroupName(pod)
 	}
 	klog.Infof("[Fluence] Group name for %s is %s", pod.Name, groupName)
 	klog.Infof("[Fluence] Group size for %s is %d", pod.Name, groupSize)
@@ -56,22 +61,22 @@ func (f *Fluence) ensureFluenceGroup(pod *v1.Pod) string {
 }
 
 // deleteFluenceGroup ensures the pod group is deleted, if it exists
-func (f *Fluence) DeleteFluenceGroup(pod *v1.Pod) {
+func DeleteFluenceGroup(pod *v1.Pod) {
 	// Get the group name and size from the fluence labels
-	pg := f.getPodsGroup(pod)
+	pg := GetPodsGroup(pod)
 	fcore.DeletePodGroup(pg.Name)
 	klog.Infof("[Fluence] known groups are:\n")
 	fcore.ListGroups()
 }
 
 // getFluenceGroupName looks for the group to indicate a fluence group, and returns it
-func (f *Fluence) getFluenceGroupName(pod *v1.Pod) string {
+func getFluenceGroupName(pod *v1.Pod) string {
 	groupName, _ := pod.Labels[PodGroupNameLabel]
 	return groupName
 }
 
 // getFluenceGroupSize gets the size of the fluence group
-func (f *Fluence) getFluenceGroupSize(pod *v1.Pod) int32 {
+func getFluenceGroupSize(pod *v1.Pod) int32 {
 	size, _ := pod.Labels[PodGroupSizeLabel]
 
 	// Default size of 1 if the label is not set (but name is)
@@ -88,8 +93,8 @@ func (f *Fluence) getFluenceGroupSize(pod *v1.Pod) int32 {
 	return int32(intSize)
 }
 
-// getCreationTimestamp first tries the fluence group, then falls back to the initial attempt timestamp
-func (f *Fluence) getCreationTimestamp(groupName string, podInfo *framework.QueuedPodInfo) metav1.MicroTime {
+// GetCreationTimestamp first tries the fluence group, then falls back to the initial attempt timestamp
+func GetCreationTimestamp(groupName string, podInfo *framework.QueuedPodInfo) metav1.MicroTime {
 	pg := fcore.GetPodGroup(groupName)
 
 	// IsZero is an indicator if this was actually set
diff --git a/src/Makefile b/src/Makefile
index 344bde1..3392add 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -36,3 +36,4 @@ protoc: $(LOCALBIN)
 .PHONY: proto
 proto: protoc
 	PATH=$(LOCALBIN):${PATH} protoc --go_out=. --go_opt=paths=source_relative --go-grpc_out=. --go-grpc_opt=paths=source_relative fluence/fluxcli-grpc/fluxcli.proto
+	PATH=$(LOCALBIN):${PATH} protoc --go_out=. --go_opt=paths=source_relative --go-grpc_out=. --go-grpc_opt=paths=source_relative fluence/service-grpc/service.proto
\ No newline at end of file
diff --git a/src/fluence/cmd/main.go b/src/fluence/cmd/main.go
index c064ce8..3fb6a06 100644
--- a/src/fluence/cmd/main.go
+++ b/src/fluence/cmd/main.go
@@ -1,30 +1,44 @@
 package main
 
 import (
-	"fmt"
 	"flag"
+	"fmt"
 	"net"
-	"google.golang.org/grpc/keepalive"
-	"google.golang.org/grpc"
+	"strings"
 	"time"
 
+	"google.golang.org/grpc"
+	"google.golang.org/grpc/keepalive"
+
 	pb "github.com/flux-framework/flux-k8s/flux-plugin/fluence/fluxcli-grpc"
 	"github.com/flux-framework/flux-k8s/flux-plugin/fluence/fluxion"
+	"github.com/flux-framework/flux-k8s/flux-plugin/fluence/service"
+	svcPb "github.com/flux-framework/flux-k8s/flux-plugin/fluence/service-grpc"
 )
 
-
 const (
-	port = ":4242"
+	defaultPort           = ":4242"
+	enableExternalService = false
 )
 
 var responsechan chan string
 
-func main () {
+func main() {
 	fmt.Println("This is the fluxion grpc server")
 	policy := flag.String("policy", "", "Match policy")
 	label := flag.String("label", "", "Label name for fluence dedicated nodes")
+	grpcPort := flag.String("port", defaultPort, "Port for grpc service")
+	enableServicePlugin := flag.Bool("external-service", enableExternalService, "Flag to enable the external service (defaults to false)")
 
 	flag.Parse()
+
+	// Ensure our port starts with :
+	port := *grpcPort
+	if !strings.HasPrefix(":", port) {
+		port = fmt.Sprintf(":%s", port)
+	}
+
+	// Fluxion GRPC
 	flux := fluxion.Fluxion{}
 	flux.InitFluxion(policy, label)
 
@@ -36,14 +50,27 @@ func main () {
 	responsechan = make(chan string)
 	s := grpc.NewServer(
 		grpc.KeepaliveParams(keepalive.ServerParameters{
-			MaxConnectionIdle: 5 * time.Minute,          
+			MaxConnectionIdle: 5 * time.Minute,
 		}),
 	)
-	pb.RegisterFluxcliServiceServer(s, &flux /*&server{flux: flux}*/)
+	pb.RegisterFluxcliServiceServer(s, &flux)
+
+	// External plugin (Kubectl) GRPC
+	// This will eventually be an external GRPC module that can
+	// be shared by fluence (flux-k8s) and fluence-kubectl
+	// We give it a handle to Flux to get the state of groups
+	// and job Ids. The direct interaction with Fluxion
+	// happens through the other service handle
+	if *enableServicePlugin {
+		plugin := service.ExternalService{}
+		plugin.Init()
+		svcPb.RegisterExternalPluginServiceServer(s, &plugin)
+	}
+
 	fmt.Printf("[GRPCServer] gRPC Listening on %s\n", lis.Addr().String())
 	if err := s.Serve(lis); err != nil {
 		fmt.Printf("[GRPCServer] failed to serve: %v\n", err)
 	}
-	
+
 	fmt.Printf("[GRPCServer] Exiting\n")
-}
\ No newline at end of file
+}
diff --git a/src/fluence/cmd/main.go.bk b/src/fluence/cmd/main.go.bk
deleted file mode 100644
index 5e66d14..0000000
--- a/src/fluence/cmd/main.go.bk
+++ /dev/null
@@ -1,15 +0,0 @@
-package main
-
-import (
-	"fmt"
-	"flag"
-	"github.com/flux-framework/flux-k8s/flux-plugin/kubeflux/fluxion"
-)
-
-func main () {
-	policy := flag.String("policy", "", "Match policy")
-	flag.Parse()
-	fmt.Println("Policy ", policy)
-	fc := fluxion.Fluxion{Policy: *policy}
-	fc.InitFluxion()
-}
\ No newline at end of file
diff --git a/src/fluence/defaults/defaults.go b/src/fluence/defaults/defaults.go
new file mode 100644
index 0000000..f4fc8f2
--- /dev/null
+++ b/src/fluence/defaults/defaults.go
@@ -0,0 +1,5 @@
+package defaults
+
+var (
+	KubernetesJsonGraphFormat = "/home/data/jgf/kubecluster.json"
+)
diff --git a/src/fluence/fluxcli-grpc/fluxcli.proto b/src/fluence/fluxcli-grpc/fluxcli.proto
index f85b558..1446041 100644
--- a/src/fluence/fluxcli-grpc/fluxcli.proto
+++ b/src/fluence/fluxcli-grpc/fluxcli.proto
@@ -3,8 +3,7 @@ option go_package = "grpc/fluxcli";
 
 package fluxcli;
 
-
-// Service definition
+// Service definition for Fluxclient
 service FluxcliService {
     // Sends a Match command
     rpc Match(MatchRequest) returns (MatchResponse) {}
@@ -73,4 +72,4 @@ message JGFRequest {
 // The JGF response message
 message JGFResponse {
     string jgf = 1;
-}
\ No newline at end of file
+}
diff --git a/src/fluence/fluxion/fluxion.go b/src/fluence/fluxion/fluxion.go
index 18d6735..f29ac62 100644
--- a/src/fluence/fluxion/fluxion.go
+++ b/src/fluence/fluxion/fluxion.go
@@ -3,6 +3,7 @@ package fluxion
 import (
 	"os"
 
+	"github.com/flux-framework/flux-k8s/flux-plugin/fluence/defaults"
 	pb "github.com/flux-framework/flux-k8s/flux-plugin/fluence/fluxcli-grpc"
 	"github.com/flux-framework/flux-k8s/flux-plugin/fluence/jobspec"
 	"github.com/flux-framework/flux-k8s/flux-plugin/fluence/utils"
@@ -22,14 +23,13 @@ type Fluxion struct {
 func (f *Fluxion) InitFluxion(policy *string, label *string) {
 	f.cli = fluxcli.NewReapiClient()
 
-	klog.Infof("[Fluence] Created flux resource client ", f.cli)
-	filename := "/home/data/jgf/kubecluster.json"
-	err := utils.CreateJGF(filename, label)
+	klog.Infof("[Fluence] Created flux resource client %s", f.cli)
+	err := utils.CreateJGF(defaults.KubernetesJsonGraphFormat, label)
 	if err != nil {
 		return
 	}
 
-	jgf, err := os.ReadFile(filename)
+	jgf, err := os.ReadFile(defaults.KubernetesJsonGraphFormat)
 	if err != nil {
 		klog.Error("Error reading JGF")
 		return
@@ -38,7 +38,7 @@ func (f *Fluxion) InitFluxion(policy *string, label *string) {
 	p := "{}"
 	if *policy != "" {
 		p = string("{\"matcher_policy\": \"" + *policy + "\"}")
-		klog.Infof("[Fluence] match policy: ", p)
+		klog.Infof("[Fluence] match policy: %s", p)
 	}
 
 	f.cli.InitContext(string(jgf), p)
diff --git a/src/fluence/service-grpc/service.pb.go b/src/fluence/service-grpc/service.pb.go
new file mode 100644
index 0000000..eca0e69
--- /dev/null
+++ b/src/fluence/service-grpc/service.pb.go
@@ -0,0 +1,351 @@
+// Code generated by protoc-gen-go. DO NOT EDIT.
+// versions:
+// 	protoc-gen-go v1.28.1
+// 	protoc        v3.20.3
+// source: fluence/service-grpc/service.proto
+
+package service
+
+import (
+	protoreflect "google.golang.org/protobuf/reflect/protoreflect"
+	protoimpl "google.golang.org/protobuf/runtime/protoimpl"
+	reflect "reflect"
+	sync "sync"
+)
+
+const (
+	// Verify that this generated code is sufficiently up-to-date.
+	_ = protoimpl.EnforceVersion(20 - protoimpl.MinVersion)
+	// Verify that runtime/protoimpl is sufficiently up-to-date.
+	_ = protoimpl.EnforceVersion(protoimpl.MaxVersion - 20)
+)
+
+// GroupRequest for a group
+type GroupRequest struct {
+	state         protoimpl.MessageState
+	sizeCache     protoimpl.SizeCache
+	unknownFields protoimpl.UnknownFields
+
+	Group string `protobuf:"bytes,1,opt,name=group,proto3" json:"group,omitempty"`
+}
+
+func (x *GroupRequest) Reset() {
+	*x = GroupRequest{}
+	if protoimpl.UnsafeEnabled {
+		mi := &file_fluence_service_grpc_service_proto_msgTypes[0]
+		ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+		ms.StoreMessageInfo(mi)
+	}
+}
+
+func (x *GroupRequest) String() string {
+	return protoimpl.X.MessageStringOf(x)
+}
+
+func (*GroupRequest) ProtoMessage() {}
+
+func (x *GroupRequest) ProtoReflect() protoreflect.Message {
+	mi := &file_fluence_service_grpc_service_proto_msgTypes[0]
+	if protoimpl.UnsafeEnabled && x != nil {
+		ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+		if ms.LoadMessageInfo() == nil {
+			ms.StoreMessageInfo(mi)
+		}
+		return ms
+	}
+	return mi.MessageOf(x)
+}
+
+// Deprecated: Use GroupRequest.ProtoReflect.Descriptor instead.
+func (*GroupRequest) Descriptor() ([]byte, []int) {
+	return file_fluence_service_grpc_service_proto_rawDescGZIP(), []int{0}
+}
+
+func (x *GroupRequest) GetGroup() string {
+	if x != nil {
+		return x.Group
+	}
+	return ""
+}
+
+// GroupResponse
+type GroupResponse struct {
+	state         protoimpl.MessageState
+	sizeCache     protoimpl.SizeCache
+	unknownFields protoimpl.UnknownFields
+
+	Name string `protobuf:"bytes,1,opt,name=name,proto3" json:"name,omitempty"`
+	Size int64  `protobuf:"varint,2,opt,name=size,proto3" json:"size,omitempty"`
+}
+
+func (x *GroupResponse) Reset() {
+	*x = GroupResponse{}
+	if protoimpl.UnsafeEnabled {
+		mi := &file_fluence_service_grpc_service_proto_msgTypes[1]
+		ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+		ms.StoreMessageInfo(mi)
+	}
+}
+
+func (x *GroupResponse) String() string {
+	return protoimpl.X.MessageStringOf(x)
+}
+
+func (*GroupResponse) ProtoMessage() {}
+
+func (x *GroupResponse) ProtoReflect() protoreflect.Message {
+	mi := &file_fluence_service_grpc_service_proto_msgTypes[1]
+	if protoimpl.UnsafeEnabled && x != nil {
+		ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+		if ms.LoadMessageInfo() == nil {
+			ms.StoreMessageInfo(mi)
+		}
+		return ms
+	}
+	return mi.MessageOf(x)
+}
+
+// Deprecated: Use GroupResponse.ProtoReflect.Descriptor instead.
+func (*GroupResponse) Descriptor() ([]byte, []int) {
+	return file_fluence_service_grpc_service_proto_rawDescGZIP(), []int{1}
+}
+
+func (x *GroupResponse) GetName() string {
+	if x != nil {
+		return x.Name
+	}
+	return ""
+}
+
+func (x *GroupResponse) GetSize() int64 {
+	if x != nil {
+		return x.Size
+	}
+	return 0
+}
+
+type ResourceRequest struct {
+	state         protoimpl.MessageState
+	sizeCache     protoimpl.SizeCache
+	unknownFields protoimpl.UnknownFields
+}
+
+func (x *ResourceRequest) Reset() {
+	*x = ResourceRequest{}
+	if protoimpl.UnsafeEnabled {
+		mi := &file_fluence_service_grpc_service_proto_msgTypes[2]
+		ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+		ms.StoreMessageInfo(mi)
+	}
+}
+
+func (x *ResourceRequest) String() string {
+	return protoimpl.X.MessageStringOf(x)
+}
+
+func (*ResourceRequest) ProtoMessage() {}
+
+func (x *ResourceRequest) ProtoReflect() protoreflect.Message {
+	mi := &file_fluence_service_grpc_service_proto_msgTypes[2]
+	if protoimpl.UnsafeEnabled && x != nil {
+		ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+		if ms.LoadMessageInfo() == nil {
+			ms.StoreMessageInfo(mi)
+		}
+		return ms
+	}
+	return mi.MessageOf(x)
+}
+
+// Deprecated: Use ResourceRequest.ProtoReflect.Descriptor instead.
+func (*ResourceRequest) Descriptor() ([]byte, []int) {
+	return file_fluence_service_grpc_service_proto_rawDescGZIP(), []int{2}
+}
+
+type ResourceResponse struct {
+	state         protoimpl.MessageState
+	sizeCache     protoimpl.SizeCache
+	unknownFields protoimpl.UnknownFields
+
+	Graph string `protobuf:"bytes,1,opt,name=graph,proto3" json:"graph,omitempty"`
+}
+
+func (x *ResourceResponse) Reset() {
+	*x = ResourceResponse{}
+	if protoimpl.UnsafeEnabled {
+		mi := &file_fluence_service_grpc_service_proto_msgTypes[3]
+		ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+		ms.StoreMessageInfo(mi)
+	}
+}
+
+func (x *ResourceResponse) String() string {
+	return protoimpl.X.MessageStringOf(x)
+}
+
+func (*ResourceResponse) ProtoMessage() {}
+
+func (x *ResourceResponse) ProtoReflect() protoreflect.Message {
+	mi := &file_fluence_service_grpc_service_proto_msgTypes[3]
+	if protoimpl.UnsafeEnabled && x != nil {
+		ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+		if ms.LoadMessageInfo() == nil {
+			ms.StoreMessageInfo(mi)
+		}
+		return ms
+	}
+	return mi.MessageOf(x)
+}
+
+// Deprecated: Use ResourceResponse.ProtoReflect.Descriptor instead.
+func (*ResourceResponse) Descriptor() ([]byte, []int) {
+	return file_fluence_service_grpc_service_proto_rawDescGZIP(), []int{3}
+}
+
+func (x *ResourceResponse) GetGraph() string {
+	if x != nil {
+		return x.Graph
+	}
+	return ""
+}
+
+var File_fluence_service_grpc_service_proto protoreflect.FileDescriptor
+
+var file_fluence_service_grpc_service_proto_rawDesc = []byte{
+	0x0a, 0x22, 0x66, 0x6c, 0x75, 0x65, 0x6e, 0x63, 0x65, 0x2f, 0x73, 0x65, 0x72, 0x76, 0x69, 0x63,
+	0x65, 0x2d, 0x67, 0x72, 0x70, 0x63, 0x2f, 0x73, 0x65, 0x72, 0x76, 0x69, 0x63, 0x65, 0x2e, 0x70,
+	0x72, 0x6f, 0x74, 0x6f, 0x12, 0x07, 0x73, 0x65, 0x72, 0x76, 0x69, 0x63, 0x65, 0x22, 0x24, 0x0a,
+	0x0c, 0x47, 0x72, 0x6f, 0x75, 0x70, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x14, 0x0a,
+	0x05, 0x67, 0x72, 0x6f, 0x75, 0x70, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x67, 0x72,
+	0x6f, 0x75, 0x70, 0x22, 0x37, 0x0a, 0x0d, 0x47, 0x72, 0x6f, 0x75, 0x70, 0x52, 0x65, 0x73, 0x70,
+	0x6f, 0x6e, 0x73, 0x65, 0x12, 0x12, 0x0a, 0x04, 0x6e, 0x61, 0x6d, 0x65, 0x18, 0x01, 0x20, 0x01,
+	0x28, 0x09, 0x52, 0x04, 0x6e, 0x61, 0x6d, 0x65, 0x12, 0x12, 0x0a, 0x04, 0x73, 0x69, 0x7a, 0x65,
+	0x18, 0x02, 0x20, 0x01, 0x28, 0x03, 0x52, 0x04, 0x73, 0x69, 0x7a, 0x65, 0x22, 0x11, 0x0a, 0x0f,
+	0x52, 0x65, 0x73, 0x6f, 0x75, 0x72, 0x63, 0x65, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x22,
+	0x28, 0x0a, 0x10, 0x52, 0x65, 0x73, 0x6f, 0x75, 0x72, 0x63, 0x65, 0x52, 0x65, 0x73, 0x70, 0x6f,
+	0x6e, 0x73, 0x65, 0x12, 0x14, 0x0a, 0x05, 0x67, 0x72, 0x61, 0x70, 0x68, 0x18, 0x01, 0x20, 0x01,
+	0x28, 0x09, 0x52, 0x05, 0x67, 0x72, 0x61, 0x70, 0x68, 0x32, 0xda, 0x01, 0x0a, 0x15, 0x45, 0x78,
+	0x74, 0x65, 0x72, 0x6e, 0x61, 0x6c, 0x50, 0x6c, 0x75, 0x67, 0x69, 0x6e, 0x53, 0x65, 0x72, 0x76,
+	0x69, 0x63, 0x65, 0x12, 0x45, 0x0a, 0x0c, 0x47, 0x65, 0x74, 0x52, 0x65, 0x73, 0x6f, 0x75, 0x72,
+	0x63, 0x65, 0x73, 0x12, 0x18, 0x2e, 0x73, 0x65, 0x72, 0x76, 0x69, 0x63, 0x65, 0x2e, 0x52, 0x65,
+	0x73, 0x6f, 0x75, 0x72, 0x63, 0x65, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x19, 0x2e,
+	0x73, 0x65, 0x72, 0x76, 0x69, 0x63, 0x65, 0x2e, 0x52, 0x65, 0x73, 0x6f, 0x75, 0x72, 0x63, 0x65,
+	0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x22, 0x00, 0x12, 0x3d, 0x0a, 0x0a, 0x4c, 0x69,
+	0x73, 0x74, 0x47, 0x72, 0x6f, 0x75, 0x70, 0x73, 0x12, 0x15, 0x2e, 0x73, 0x65, 0x72, 0x76, 0x69,
+	0x63, 0x65, 0x2e, 0x47, 0x72, 0x6f, 0x75, 0x70, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a,
+	0x16, 0x2e, 0x73, 0x65, 0x72, 0x76, 0x69, 0x63, 0x65, 0x2e, 0x47, 0x72, 0x6f, 0x75, 0x70, 0x52,
+	0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x22, 0x00, 0x12, 0x3b, 0x0a, 0x08, 0x47, 0x65, 0x74,
+	0x47, 0x72, 0x6f, 0x75, 0x70, 0x12, 0x15, 0x2e, 0x73, 0x65, 0x72, 0x76, 0x69, 0x63, 0x65, 0x2e,
+	0x47, 0x72, 0x6f, 0x75, 0x70, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x16, 0x2e, 0x73,
+	0x65, 0x72, 0x76, 0x69, 0x63, 0x65, 0x2e, 0x47, 0x72, 0x6f, 0x75, 0x70, 0x52, 0x65, 0x73, 0x70,
+	0x6f, 0x6e, 0x73, 0x65, 0x22, 0x00, 0x42, 0x0e, 0x5a, 0x0c, 0x67, 0x72, 0x70, 0x63, 0x2f, 0x73,
+	0x65, 0x72, 0x76, 0x69, 0x63, 0x65, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33,
+}
+
+var (
+	file_fluence_service_grpc_service_proto_rawDescOnce sync.Once
+	file_fluence_service_grpc_service_proto_rawDescData = file_fluence_service_grpc_service_proto_rawDesc
+)
+
+func file_fluence_service_grpc_service_proto_rawDescGZIP() []byte {
+	file_fluence_service_grpc_service_proto_rawDescOnce.Do(func() {
+		file_fluence_service_grpc_service_proto_rawDescData = protoimpl.X.CompressGZIP(file_fluence_service_grpc_service_proto_rawDescData)
+	})
+	return file_fluence_service_grpc_service_proto_rawDescData
+}
+
+var file_fluence_service_grpc_service_proto_msgTypes = make([]protoimpl.MessageInfo, 4)
+var file_fluence_service_grpc_service_proto_goTypes = []interface{}{
+	(*GroupRequest)(nil),     // 0: service.GroupRequest
+	(*GroupResponse)(nil),    // 1: service.GroupResponse
+	(*ResourceRequest)(nil),  // 2: service.ResourceRequest
+	(*ResourceResponse)(nil), // 3: service.ResourceResponse
+}
+var file_fluence_service_grpc_service_proto_depIdxs = []int32{
+	2, // 0: service.ExternalPluginService.GetResources:input_type -> service.ResourceRequest
+	0, // 1: service.ExternalPluginService.ListGroups:input_type -> service.GroupRequest
+	0, // 2: service.ExternalPluginService.GetGroup:input_type -> service.GroupRequest
+	3, // 3: service.ExternalPluginService.GetResources:output_type -> service.ResourceResponse
+	1, // 4: service.ExternalPluginService.ListGroups:output_type -> service.GroupResponse
+	1, // 5: service.ExternalPluginService.GetGroup:output_type -> service.GroupResponse
+	3, // [3:6] is the sub-list for method output_type
+	0, // [0:3] is the sub-list for method input_type
+	0, // [0:0] is the sub-list for extension type_name
+	0, // [0:0] is the sub-list for extension extendee
+	0, // [0:0] is the sub-list for field type_name
+}
+
+func init() { file_fluence_service_grpc_service_proto_init() }
+func file_fluence_service_grpc_service_proto_init() {
+	if File_fluence_service_grpc_service_proto != nil {
+		return
+	}
+	if !protoimpl.UnsafeEnabled {
+		file_fluence_service_grpc_service_proto_msgTypes[0].Exporter = func(v interface{}, i int) interface{} {
+			switch v := v.(*GroupRequest); i {
+			case 0:
+				return &v.state
+			case 1:
+				return &v.sizeCache
+			case 2:
+				return &v.unknownFields
+			default:
+				return nil
+			}
+		}
+		file_fluence_service_grpc_service_proto_msgTypes[1].Exporter = func(v interface{}, i int) interface{} {
+			switch v := v.(*GroupResponse); i {
+			case 0:
+				return &v.state
+			case 1:
+				return &v.sizeCache
+			case 2:
+				return &v.unknownFields
+			default:
+				return nil
+			}
+		}
+		file_fluence_service_grpc_service_proto_msgTypes[2].Exporter = func(v interface{}, i int) interface{} {
+			switch v := v.(*ResourceRequest); i {
+			case 0:
+				return &v.state
+			case 1:
+				return &v.sizeCache
+			case 2:
+				return &v.unknownFields
+			default:
+				return nil
+			}
+		}
+		file_fluence_service_grpc_service_proto_msgTypes[3].Exporter = func(v interface{}, i int) interface{} {
+			switch v := v.(*ResourceResponse); i {
+			case 0:
+				return &v.state
+			case 1:
+				return &v.sizeCache
+			case 2:
+				return &v.unknownFields
+			default:
+				return nil
+			}
+		}
+	}
+	type x struct{}
+	out := protoimpl.TypeBuilder{
+		File: protoimpl.DescBuilder{
+			GoPackagePath: reflect.TypeOf(x{}).PkgPath(),
+			RawDescriptor: file_fluence_service_grpc_service_proto_rawDesc,
+			NumEnums:      0,
+			NumMessages:   4,
+			NumExtensions: 0,
+			NumServices:   1,
+		},
+		GoTypes:           file_fluence_service_grpc_service_proto_goTypes,
+		DependencyIndexes: file_fluence_service_grpc_service_proto_depIdxs,
+		MessageInfos:      file_fluence_service_grpc_service_proto_msgTypes,
+	}.Build()
+	File_fluence_service_grpc_service_proto = out.File
+	file_fluence_service_grpc_service_proto_rawDesc = nil
+	file_fluence_service_grpc_service_proto_goTypes = nil
+	file_fluence_service_grpc_service_proto_depIdxs = nil
+}
diff --git a/src/fluence/service-grpc/service.proto b/src/fluence/service-grpc/service.proto
new file mode 100644
index 0000000..6240314
--- /dev/null
+++ b/src/fluence/service-grpc/service.proto
@@ -0,0 +1,34 @@
+syntax = "proto3";
+option go_package = "grpc/service";
+
+package service;
+
+
+// Service definition for an external plugin like kubectl
+service ExternalPluginService {
+
+    // This is supported via a shared file in the container
+    rpc GetResources(ResourceRequest) returns (ResourceResponse) {}
+
+    // Note we currently cannot support getting group metadata, need to add handle to get info, etc.
+    rpc ListGroups(GroupRequest) returns (GroupResponse) {}
+    rpc GetGroup(GroupRequest) returns (GroupResponse) {}
+}
+
+// GroupRequest for a group
+message GroupRequest {
+    string group = 1;
+}
+
+// GroupResponse
+message GroupResponse {
+    string name = 1;
+    int64 size = 2;
+}
+
+message ResourceRequest {}
+message ResourceResponse {
+    string graph = 1;
+}
+
+
diff --git a/src/fluence/service-grpc/service_grpc.pb.go b/src/fluence/service-grpc/service_grpc.pb.go
new file mode 100644
index 0000000..c15f8f3
--- /dev/null
+++ b/src/fluence/service-grpc/service_grpc.pb.go
@@ -0,0 +1,181 @@
+// Code generated by protoc-gen-go-grpc. DO NOT EDIT.
+// versions:
+// - protoc-gen-go-grpc v1.2.0
+// - protoc             v3.20.3
+// source: fluence/service-grpc/service.proto
+
+package service
+
+import (
+	context "context"
+	grpc "google.golang.org/grpc"
+	codes "google.golang.org/grpc/codes"
+	status "google.golang.org/grpc/status"
+)
+
+// This is a compile-time assertion to ensure that this generated file
+// is compatible with the grpc package it is being compiled against.
+// Requires gRPC-Go v1.32.0 or later.
+const _ = grpc.SupportPackageIsVersion7
+
+// ExternalPluginServiceClient is the client API for ExternalPluginService service.
+//
+// For semantics around ctx use and closing/ending streaming RPCs, please refer to https://pkg.go.dev/google.golang.org/grpc/?tab=doc#ClientConn.NewStream.
+type ExternalPluginServiceClient interface {
+	// This is supported via a shared file in the container
+	GetResources(ctx context.Context, in *ResourceRequest, opts ...grpc.CallOption) (*ResourceResponse, error)
+	// Note we currently cannot support getting group metadata, need to add handle to get info, etc.
+	ListGroups(ctx context.Context, in *GroupRequest, opts ...grpc.CallOption) (*GroupResponse, error)
+	GetGroup(ctx context.Context, in *GroupRequest, opts ...grpc.CallOption) (*GroupResponse, error)
+}
+
+type externalPluginServiceClient struct {
+	cc grpc.ClientConnInterface
+}
+
+func NewExternalPluginServiceClient(cc grpc.ClientConnInterface) ExternalPluginServiceClient {
+	return &externalPluginServiceClient{cc}
+}
+
+func (c *externalPluginServiceClient) GetResources(ctx context.Context, in *ResourceRequest, opts ...grpc.CallOption) (*ResourceResponse, error) {
+	out := new(ResourceResponse)
+	err := c.cc.Invoke(ctx, "/service.ExternalPluginService/GetResources", in, out, opts...)
+	if err != nil {
+		return nil, err
+	}
+	return out, nil
+}
+
+func (c *externalPluginServiceClient) ListGroups(ctx context.Context, in *GroupRequest, opts ...grpc.CallOption) (*GroupResponse, error) {
+	out := new(GroupResponse)
+	err := c.cc.Invoke(ctx, "/service.ExternalPluginService/ListGroups", in, out, opts...)
+	if err != nil {
+		return nil, err
+	}
+	return out, nil
+}
+
+func (c *externalPluginServiceClient) GetGroup(ctx context.Context, in *GroupRequest, opts ...grpc.CallOption) (*GroupResponse, error) {
+	out := new(GroupResponse)
+	err := c.cc.Invoke(ctx, "/service.ExternalPluginService/GetGroup", in, out, opts...)
+	if err != nil {
+		return nil, err
+	}
+	return out, nil
+}
+
+// ExternalPluginServiceServer is the server API for ExternalPluginService service.
+// All implementations must embed UnimplementedExternalPluginServiceServer
+// for forward compatibility
+type ExternalPluginServiceServer interface {
+	// This is supported via a shared file in the container
+	GetResources(context.Context, *ResourceRequest) (*ResourceResponse, error)
+	// Note we currently cannot support getting group metadata, need to add handle to get info, etc.
+	ListGroups(context.Context, *GroupRequest) (*GroupResponse, error)
+	GetGroup(context.Context, *GroupRequest) (*GroupResponse, error)
+	mustEmbedUnimplementedExternalPluginServiceServer()
+}
+
+// UnimplementedExternalPluginServiceServer must be embedded to have forward compatible implementations.
+type UnimplementedExternalPluginServiceServer struct {
+}
+
+func (UnimplementedExternalPluginServiceServer) GetResources(context.Context, *ResourceRequest) (*ResourceResponse, error) {
+	return nil, status.Errorf(codes.Unimplemented, "method GetResources not implemented")
+}
+func (UnimplementedExternalPluginServiceServer) ListGroups(context.Context, *GroupRequest) (*GroupResponse, error) {
+	return nil, status.Errorf(codes.Unimplemented, "method ListGroups not implemented")
+}
+func (UnimplementedExternalPluginServiceServer) GetGroup(context.Context, *GroupRequest) (*GroupResponse, error) {
+	return nil, status.Errorf(codes.Unimplemented, "method GetGroup not implemented")
+}
+func (UnimplementedExternalPluginServiceServer) mustEmbedUnimplementedExternalPluginServiceServer() {}
+
+// UnsafeExternalPluginServiceServer may be embedded to opt out of forward compatibility for this service.
+// Use of this interface is not recommended, as added methods to ExternalPluginServiceServer will
+// result in compilation errors.
+type UnsafeExternalPluginServiceServer interface {
+	mustEmbedUnimplementedExternalPluginServiceServer()
+}
+
+func RegisterExternalPluginServiceServer(s grpc.ServiceRegistrar, srv ExternalPluginServiceServer) {
+	s.RegisterService(&ExternalPluginService_ServiceDesc, srv)
+}
+
+func _ExternalPluginService_GetResources_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
+	in := new(ResourceRequest)
+	if err := dec(in); err != nil {
+		return nil, err
+	}
+	if interceptor == nil {
+		return srv.(ExternalPluginServiceServer).GetResources(ctx, in)
+	}
+	info := &grpc.UnaryServerInfo{
+		Server:     srv,
+		FullMethod: "/service.ExternalPluginService/GetResources",
+	}
+	handler := func(ctx context.Context, req interface{}) (interface{}, error) {
+		return srv.(ExternalPluginServiceServer).GetResources(ctx, req.(*ResourceRequest))
+	}
+	return interceptor(ctx, in, info, handler)
+}
+
+func _ExternalPluginService_ListGroups_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
+	in := new(GroupRequest)
+	if err := dec(in); err != nil {
+		return nil, err
+	}
+	if interceptor == nil {
+		return srv.(ExternalPluginServiceServer).ListGroups(ctx, in)
+	}
+	info := &grpc.UnaryServerInfo{
+		Server:     srv,
+		FullMethod: "/service.ExternalPluginService/ListGroups",
+	}
+	handler := func(ctx context.Context, req interface{}) (interface{}, error) {
+		return srv.(ExternalPluginServiceServer).ListGroups(ctx, req.(*GroupRequest))
+	}
+	return interceptor(ctx, in, info, handler)
+}
+
+func _ExternalPluginService_GetGroup_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
+	in := new(GroupRequest)
+	if err := dec(in); err != nil {
+		return nil, err
+	}
+	if interceptor == nil {
+		return srv.(ExternalPluginServiceServer).GetGroup(ctx, in)
+	}
+	info := &grpc.UnaryServerInfo{
+		Server:     srv,
+		FullMethod: "/service.ExternalPluginService/GetGroup",
+	}
+	handler := func(ctx context.Context, req interface{}) (interface{}, error) {
+		return srv.(ExternalPluginServiceServer).GetGroup(ctx, req.(*GroupRequest))
+	}
+	return interceptor(ctx, in, info, handler)
+}
+
+// ExternalPluginService_ServiceDesc is the grpc.ServiceDesc for ExternalPluginService service.
+// It's only intended for direct use with grpc.RegisterService,
+// and not to be introspected or modified (even as a copy)
+var ExternalPluginService_ServiceDesc = grpc.ServiceDesc{
+	ServiceName: "service.ExternalPluginService",
+	HandlerType: (*ExternalPluginServiceServer)(nil),
+	Methods: []grpc.MethodDesc{
+		{
+			MethodName: "GetResources",
+			Handler:    _ExternalPluginService_GetResources_Handler,
+		},
+		{
+			MethodName: "ListGroups",
+			Handler:    _ExternalPluginService_ListGroups_Handler,
+		},
+		{
+			MethodName: "GetGroup",
+			Handler:    _ExternalPluginService_GetGroup_Handler,
+		},
+	},
+	Streams:  []grpc.StreamDesc{},
+	Metadata: "fluence/service-grpc/service.proto",
+}
diff --git a/src/fluence/service/service.go b/src/fluence/service/service.go
new file mode 100644
index 0000000..ad61c1a
--- /dev/null
+++ b/src/fluence/service/service.go
@@ -0,0 +1,61 @@
+package service
+
+import (
+	"os"
+
+	"github.com/flux-framework/flux-k8s/flux-plugin/fluence/defaults"
+	pb "github.com/flux-framework/flux-k8s/flux-plugin/fluence/service-grpc"
+
+	"k8s.io/klog/v2"
+
+	"context"
+)
+
+type ExternalService struct {
+	pb.UnimplementedExternalPluginServiceServer
+}
+
+// Init is a helper function for any startup stuff, for which now we have none :)
+func (f *ExternalService) Init() {
+	klog.Infof("[Fluence] Created external service.")
+}
+
+// GetGroup gets and returns the group info
+// TODO no good way to look up group - we would need to ask Fluxion directly OR put the grpc
+// service alongside the scheduler plugin, which seems like a bad design
+func (s *ExternalService) GetGroup(ctx context.Context, in *pb.GroupRequest) (*pb.GroupResponse, error) {
+	klog.Infof("[Fluence] Calling get group endpoint! %v\n", in)
+
+	// Prepare an empty match response (that can still be serialized)
+	emptyResponse := &pb.GroupResponse{}
+	return emptyResponse, nil
+}
+
+// List group returns existing groups
+func (s *ExternalService) ListGroups(ctx context.Context, in *pb.GroupRequest) (*pb.GroupResponse, error) {
+
+	emptyResponse := &pb.GroupResponse{}
+
+	// Prepare an empty match response (that can still be serialized)
+	klog.Infof("[Fluence] Calling list groups endpoint! %v\n", in)
+
+	return emptyResponse, nil
+}
+
+// GetResources gets the current Kubernetes Json Graph Format JGF
+// This should be created on init of the scheduler
+func (s *ExternalService) GetResources(ctx context.Context, in *pb.ResourceRequest) (*pb.ResourceResponse, error) {
+
+	emptyResponse := &pb.ResourceResponse{}
+
+	// Prepare an empty match response (that can still be serialized)
+	klog.Infof("[Fluence] Calling get resources endpoint! %v\n", in)
+
+	jgf, err := os.ReadFile(defaults.KubernetesJsonGraphFormat)
+	if err != nil {
+		klog.Error("Error reading JGF")
+		return emptyResponse, err
+	}
+	emptyResponse.Graph = string(jgf)
+	return emptyResponse, nil
+}

From 673e34d002e37fcd97603574b02be15f1b4a8a46 Mon Sep 17 00:00:00 2001
From: vsoch <vsoch@users.noreply.github.com>
Date: Sat, 17 Feb 2024 00:34:02 -0700
Subject: [PATCH 09/28] feat: add controller base image to build from here

Problem: we want to be able to persist PodGroup if upstream removes it
Solution: build our own controller image, also allowing us to
tweak it to enhance fluence. This commit also renames the helm
install to be "fluence" so it is easier for the developer
workflow

Signed-off-by: vsoch <vsoch@users.noreply.github.com>
---
 .github/test.sh                               |   0
 Makefile                                      |   2 +
 README.md                                     | 173 +++++---------
 examples/kind-config.yaml                     |  26 +++
 examples/kube_setup/taint_workers.sh          |   0
 examples/pi/clean_pods.sh                     |   0
 examples/pi/demo_failed_pod_cancellation.sh   |   0
 examples/pi/init_kind_cluster.sh              |   0
 .../run_experiments/process_job_template.py   |   0
 examples/run_experiments/run_experiments.py   |   0
 .../charts/as-a-second-scheduler/values.yaml  |   1 +
 .../pkg/controllers/podgroup_controller.go    | 220 ++++++++++++++++++
 12 files changed, 306 insertions(+), 116 deletions(-)
 mode change 100755 => 100644 .github/test.sh
 create mode 100644 examples/kind-config.yaml
 mode change 100755 => 100644 examples/kube_setup/taint_workers.sh
 mode change 100755 => 100644 examples/pi/clean_pods.sh
 mode change 100755 => 100644 examples/pi/demo_failed_pod_cancellation.sh
 mode change 100755 => 100644 examples/pi/init_kind_cluster.sh
 mode change 100755 => 100644 examples/run_experiments/process_job_template.py
 mode change 100755 => 100644 examples/run_experiments/run_experiments.py
 create mode 100644 sig-scheduler-plugins/pkg/controllers/podgroup_controller.go

diff --git a/.github/test.sh b/.github/test.sh
old mode 100755
new mode 100644
diff --git a/Makefile b/Makefile
index 1356160..97efa75 100644
--- a/Makefile
+++ b/Makefile
@@ -26,8 +26,10 @@ update: clone
 prepare: clone
 	# These are entirely new directory structures
 	rm -rf $(CLONE_UPSTREAM)/pkg/fluence
+	rm -rf $(CLONE_UPSTREAM)/pkg/controllers/podgroup_controller.go
 	rm -rf $(CLONE_UPSTREAM)/manifests/fluence
 	cp -R sig-scheduler-plugins/pkg/fluence $(CLONE_UPSTREAM)/pkg/fluence
+	cp -R sig-scheduler-plugins/pkg/controllers/* $(CLONE_UPSTREAM)/pkg/controllers/
 	# This is the one exception not from sig-scheduler-plugins because it is needed in both spots
 	cp -R src/fluence/fluxcli-grpc $(CLONE_UPSTREAM)/pkg/fluence/fluxcli-grpc
 	# These are files with subtle changes to add fluence
diff --git a/README.md b/README.md
index 0433799..f0d67cd 100644
--- a/README.md
+++ b/README.md
@@ -2,7 +2,9 @@
 
 ![docs/images/fluence.png](docs/images/fluence.png)
 
-Fluence enables HPC-grade pod scheduling in Kubernetes via the [Kubernetes Scheduling Framework](https://kubernetes.io/docs/concepts/scheduling-eviction/scheduling-framework/). Fluence uses the directed-graph based [Fluxion scheduler](https://github.com/flux-framework/flux-sched) to map pods or [podgroups](https://github.com/kubernetes-sigs/scheduler-plugins/tree/master/pkg/coscheduling) to nodes. Fluence supports all the Fluxion scheduling algorithms (e.g., `hi`, `low`, `hinode`, etc.). Note that Fluence does not currently support use in conjunction with the kube-scheduler. Pods must all be scheduled by Fluence.
+Fluence enables HPC-grade pod scheduling in Kubernetes via the [Kubernetes Scheduling Framework](https://kubernetes.io/docs/concepts/scheduling-eviction/scheduling-framework/). Fluence uses the directed-graph based [Fluxion scheduler](https://github.com/flux-framework/flux-sched) to map pods or [podgroups](https://github.com/kubernetes-sigs/scheduler-plugins/tree/master/pkg/coscheduling) to nodes. Fluence supports all the Fluxion scheduling algorithms (e.g., `hi`, `low`, `hinode`, etc.). 
+
+**Important** Fluence does not currently support use in conjunction with the kube-scheduler. Pods must all be scheduled by Fluence, and *you should not use both schedulers in the same cluster*.
 
 ## Getting started
 
@@ -66,7 +68,8 @@ cd upstream/manifests/install/charts
 helm install \
   --set scheduler.image=ghcr.io/flux-framework/fluence:latest \
   --set scheduler.sidecarimage=ghcr.io/flux-framework/fluence-sidecar \
-    schedscheduler-plugins as-a-second-scheduler/
+  --set controller.image=ghcr.io/flux-framework/fluence-controller \
+    fluence as-a-second-scheduler/
 ```
 
 And that's it! See the [testing install](#testing-install) section for a basic example
@@ -85,17 +88,18 @@ To build and test Fluence, you will need:
 There are two images we will be building:
 
  - the scheduler sidecar: built from the repository here
- - the scheduler: built from [this branch of scheduler-plugins](https://github.com/openshift-psap/scheduler-plugins/blob/fluence/build/scheduler/Dockerfile)
+ - the scheduler: built (and modified) from [this branch of scheduler-plugins](https://github.com/openshift-psap/scheduler-plugins/blob/fluence/build/scheduler/Dockerfile)
+ - the controller: same as the scheduler
 
-#### All at once (Sidecar + Scheduler)
+#### Build All
 
-**recommended**
+**This builds the scheduler, sidecar to the scheduler, and controller**
 
 This will run the full builds for all containers in one step, which includes:
 
 1. Building the fluence sidecar from source code in [src](src)
 2. Cloning the upstream kubernetes-sigs/plugin-schedulers respository to ./upstream
-3. Building the scheduler container
+3. Building the scheduler and controller containers
 
 From the root here:
 
@@ -106,115 +110,18 @@ make
 or customize the naming of your registry or local images:
 
 ```bash
-make REGISTRY=vanessa SCHEDULER_IMAGE=fluence SIDECAR_IMAGE=fluence-sidecar
-```
-
-As an alternative, you can do each of the steps separately or manually (detailed below).
-
-<details>
-
-<summary> Manual Build Instructions </summary>
-
-#### Build Sidecar
-
-To build the plugin containers, we will basically be running `make` from the [src](src) directory. We have wrapped that for you
-in the Makefile:
-
-```bash
-make build-sidecar
-```
-
-To build for a custom registry (e.g., "vanessa' on Docker Hub):
-
-```bash
-make build-sidecar REGISTRY=vanessa
-```
-
-And specify the sidecar image name too:
-
-```bash
-make build-sidecar REGISTRY=vanessa SIDECAR_IMAGE=another-sidecar
-```
-
-The equivalent manual command is:
-
-```bash
-cd src
-make
-```
-
-Using either of the approaches above, this will create the scheduler plugin main container, which can be tagged and pushed to the preferred registry. As an example,
-here we push to the result of the build above:
-
-```bash
-docker push docker.io/vanessa/fluence-sidecar:latest
-```
-
-#### Build Scheduler
-
-Note that you can run this entire process like:
-
-```bash
-make prepare
-make build
-```
-
-Or customize the name of the scheduler image:
-
-```bash
-make prepare
-make build REGISTRY=vanessa
-```
-
-For a custom scheduler or controller image (we just need the scheduler):
-
-```bash
-make build REGISTRY=vanessa CONTROLLER_IMAGE=fluence-controller SCHEDULER_IMAGE=fluence
-```
-
-To walk through it manually, first, clone the upstream scheduler-plugins repository:
-
-```bash
-git clone https://github.com/kubernetes-sigs/scheduler-plugins ./upstream
-```
-
-We need to add our fluence package to the scheduler plugins to build. You can do that manully as follows:
-
-```bash
-# These are entirely new directory structures
-cp -R sig-scheduler-plugins/pkg/fluence ./upstream/pkg/fluence
-cp -R sig-scheduler-plugins/manifests/fluence ./upstream/manifests/fluence
-
-# These are files with subtle changes to add fluence
-cp sig-scheduler-plugins/cmd/scheduler/main.go ./upstream/cmd/scheduler/main.go
-cp sig-scheduler-plugins/manifests/install/charts/as-a-second-scheduler/templates/deployment.yaml ./upstream/manifests/install/charts/as-a-second-scheduler/templates/deployment.yaml
-cp sig-scheduler-plugins/manifests/install/charts/as-a-second-scheduler/values.yaml ./upstream/manifests/install/charts/as-a-second-scheduler/values.yaml
-```
-
-Then change directory to the scheduler plugins repository. 
-
-```bash
-cd ./upstream
+make REGISTRY=vanessa SCHEDULER_IMAGE=fluence SIDECAR_IMAGE=fluence-sidecar CONTROLLER_IMAGE=fluence-controller
 ```
 
-And build! You'll most likely want to set a custom registry and image name again:
+As an alternative, you can look at the Makefile to do each of the steps separately.
 
-```bash
-# This will build to localhost
-make local-image
-
-# this will build to docker.io/vanessa/fluence
-make local-image REGISTRY=vanessa CONTROLLER_IMAGE=fluence
-```
-
-</details>
-
-**Important** the make command above produces _two images_ and you want to use the first that is mentioned in the output (not the second, which is a controller).
 
 Whatever build approach you use, you'll want to push to your registry for later discovery!
 
 ```bash
 docker push docker.io/vanessa/fluence
+docker push docker.io/vanessa/fluence-sidecar
+docker push docker.io/vanessa/fluence-controller
 ```
 
 ### Prepare Cluster
@@ -268,7 +175,7 @@ scheduler:
 
 controller:
   name: scheduler-plugins-controller
-  image: registry.k8s.io/scheduler-plugins/controller:v0.27.8
+  image: ghcr.io/flux-framework/fluence-controller:latest
   replicaCount: 1
   pullPolicy: IfNotPresent
 
@@ -303,7 +210,8 @@ cd upstream/manifests/install/charts
 helm install \
   --set scheduler.image=vanessa/fluence:latest \
   --set scheduler.sidecarimage=vanessa/fluence-sidecar \
-    schedscheduler-plugins as-a-second-scheduler/
+  --set controller.image=vanessa/fluence-controller \
+    fluence as-a-second-scheduler/
 ```
 
 If you load your images into your testing environment and don't need to pull, you can change the pull policy too:
@@ -312,14 +220,15 @@ If you load your images into your testing environment and don't need to pull, yo
 helm install \
   --set scheduler.image=vanessa/fluence:latest \
   --set scheduler.sidecarimage=vanessa/fluence-sidecar \
+  --set controller.image=vanessa/fluence-controller \
   --set scheduler.sidecarPullPolicy=IfNotPresent \
-    schedscheduler-plugins as-a-second-scheduler/
+    fluence as-a-second-scheduler/
 ```
 
 If you need to uninstall (e.g., to redo something):
 
 ```bash
-helm uninstall schedscheduler-plugins
+helm uninstall fluence
 ```
 
 Next you can move down to testing the install.
@@ -519,14 +428,21 @@ The easiest thing to do is to build the containers in some container namespace t
 make build REGISTRY=ghcr.io/vsoch
 ```
 
+If needed, create a "multi node" kind cluster:
+
+```bash
+kind create cluster --config ./examples/kind-config.yaml
+```
+
 And then install with your custom images:
 
 ```bash
 cd ./upstream/manifests/install/charts
 helm install \
   --set scheduler.image=ghcr.io/vsoch/fluence:latest \
+  --set controller.image=ghcr.io/vsoch/fluence-controller:latest \
   --set scheduler.sidecarimage=ghcr.io/vsoch/fluence-sidecar:latest \
-        schedscheduler-plugins as-a-second-scheduler/
+        fluence as-a-second-scheduler/
 ```
 
 And then apply what you need to test, and look at logs! 
@@ -540,8 +456,9 @@ Note that if you want to enable extra endpoints for the fluence kubectl plugin a
 helm install \
   --set scheduler.image=ghcr.io/vsoch/fluence:latest \
   --set scheduler.enableExternalService=true \
+  --set controller.image=vanessa/fluence-controller \
   --set scheduler.sidecarimage=ghcr.io/vsoch/fluence-sidecar:latest \
-        schedscheduler-plugins as-a-second-scheduler/
+        fluence as-a-second-scheduler/
 ```
 
 For this setup if you are developing locally with kind, you will need to enable the ingress. Here is `kind-config.yaml`
@@ -569,6 +486,30 @@ And to create:
 kind create cluster --config ./kind-config.yaml
 ```
 
+#### Vanessa Thinking
+
+> Updated February 15, 2024
+
+What I think might be happening (and not always, sometimes)
+
+- New pod group, no node list
+- Fluence assigns nodes
+- Nodes get assigned to pods 1:1
+- POD group is deleted
+- Some pod is sent back to queue (kubelet rejects, etc)
+- POD group does not exist and is recreated, no node list
+- Fluence asks again, but still has the first job. Not enough resources, asks forever.
+
+The above would not happen with the persistent pod group (if it wasn't cleaned up until the deletion of the job) and wouldn't happen if there are just enough resources to account for the overlap.
+
+- Does Fluence allocate resources for itself?
+- It would be nice to be able to inspect the state of Fluence.
+- At some point we want to be using the TBA fluxion-go instead of the one off branch we currently have (but we don't need to be blocked for that)
+- We should (I think) restore pod group (it's in the controller here) and have our own container built. That way we have total control over the custom resource, and we don't risk it going away.
+  - As a part of that, we can add add a mutating webhook that emulates what we are doing in fluence now to find the label, but instead we will create the CRD to hold state instead of trying to hold in the operator.
+- It could then also be investigated that we can more flexibly change the size of the group, within some min/max size (also determined by labels?) to help with scheduling.
+- Note that kueue has added a Pod Group object, so probably addresses the static case here.
+
 #### Components
 
  - [FluxStateData](sig-scheduler-plugins/pkg/fluence/core/core.go): is given to the [framework.CycleState](https://github.com/kubernetes/kubernetes/blob/242b41b36a20032f99e8a059ca0a5d764105217b/pkg/scheduler/framework/cycle_state.go#L48) and serves as a vehicle to store a cache of node name assignment.
@@ -583,14 +524,14 @@ The install commands are shown above, but often you want to uninstall!
 ```bash
  helm list
 NAME                    NAMESPACE       REVISION        UPDATED                                 STATUS          CHART                   APP VERSION
-schedscheduler-plugins  default         1               2024-01-08 12:04:58.558612156 -0700 MST deployed        scheduler-plugins-0.27.80.27.8     
+fluence  default         1               2024-01-08 12:04:58.558612156 -0700 MST deployed        scheduler-plugins-0.27.80.27.8     
 ```
 
 And then uninstall:
 
 ```bash
-$ helm uninstall schedscheduler-plugins
-release "schedscheduler-plugins" uninstalled
+$ helm uninstall fluence
+release "fluence" uninstalled
 ```
 
 
diff --git a/examples/kind-config.yaml b/examples/kind-config.yaml
new file mode 100644
index 0000000..2971483
--- /dev/null
+++ b/examples/kind-config.yaml
@@ -0,0 +1,26 @@
+kind: Cluster
+apiVersion: kind.x-k8s.io/v1alpha4
+nodes:
+- role: control-plane
+  kubeadmConfigPatches:
+  - |
+    kind: InitConfiguration
+    nodeRegistration:
+      kubeletExtraArgs:
+        node-labels: "ingress-ready=true"
+  extraPortMappings:
+  - containerPort: 8080
+    hostPort: 8080
+    protocol: TCP
+  - containerPort: 4242
+    hostPort: 4242
+    protocol: TCP
+  - containerPort: 4243
+    hostPort: 4243
+    protocol: TCP
+- role: worker
+- role: worker
+- role: worker
+- role: worker
+- role: worker
+- role: worker
\ No newline at end of file
diff --git a/examples/kube_setup/taint_workers.sh b/examples/kube_setup/taint_workers.sh
old mode 100755
new mode 100644
diff --git a/examples/pi/clean_pods.sh b/examples/pi/clean_pods.sh
old mode 100755
new mode 100644
diff --git a/examples/pi/demo_failed_pod_cancellation.sh b/examples/pi/demo_failed_pod_cancellation.sh
old mode 100755
new mode 100644
diff --git a/examples/pi/init_kind_cluster.sh b/examples/pi/init_kind_cluster.sh
old mode 100755
new mode 100644
diff --git a/examples/run_experiments/process_job_template.py b/examples/run_experiments/process_job_template.py
old mode 100755
new mode 100644
diff --git a/examples/run_experiments/run_experiments.py b/examples/run_experiments/run_experiments.py
old mode 100755
new mode 100644
diff --git a/sig-scheduler-plugins/manifests/install/charts/as-a-second-scheduler/values.yaml b/sig-scheduler-plugins/manifests/install/charts/as-a-second-scheduler/values.yaml
index 2a35a3a..be1e797 100644
--- a/sig-scheduler-plugins/manifests/install/charts/as-a-second-scheduler/values.yaml
+++ b/sig-scheduler-plugins/manifests/install/charts/as-a-second-scheduler/values.yaml
@@ -22,6 +22,7 @@ scheduler:
 controller:
   name: scheduler-plugins-controller
   image: registry.k8s.io/scheduler-plugins/controller:v0.27.8
+  image: ghcr.io/flux-framework/fluence-controller:latest
   replicaCount: 1
   pullPolicy: IfNotPresent
 
diff --git a/sig-scheduler-plugins/pkg/controllers/podgroup_controller.go b/sig-scheduler-plugins/pkg/controllers/podgroup_controller.go
new file mode 100644
index 0000000..02eb4e4
--- /dev/null
+++ b/sig-scheduler-plugins/pkg/controllers/podgroup_controller.go
@@ -0,0 +1,220 @@
+/*
+Copyright 2023 The Kubernetes Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package controllers
+
+import (
+	"context"
+	"fmt"
+	"sort"
+	"strings"
+	"time"
+
+	"github.com/go-logr/logr"
+	v1 "k8s.io/api/core/v1"
+	apierrs "k8s.io/apimachinery/pkg/api/errors"
+	"k8s.io/apimachinery/pkg/labels"
+	"k8s.io/apimachinery/pkg/runtime"
+	"k8s.io/apimachinery/pkg/types"
+	"k8s.io/client-go/tools/record"
+
+	ctrl "sigs.k8s.io/controller-runtime"
+	"sigs.k8s.io/controller-runtime/pkg/client"
+	"sigs.k8s.io/controller-runtime/pkg/controller"
+	"sigs.k8s.io/controller-runtime/pkg/handler"
+	"sigs.k8s.io/controller-runtime/pkg/log"
+	schedv1alpha1 "sigs.k8s.io/scheduler-plugins/apis/scheduling/v1alpha1"
+	"sigs.k8s.io/scheduler-plugins/pkg/util"
+)
+
+// PodGroupReconciler reconciles a PodGroup object
+type PodGroupReconciler struct {
+	log      logr.Logger
+	recorder record.EventRecorder
+
+	client.Client
+	Scheme  *runtime.Scheme
+	Workers int
+}
+
+// +kubebuilder:rbac:groups=scheduling.x-k8s.io,resources=podgroups,verbs=get;list;watch;create;update;patch;delete
+// +kubebuilder:rbac:groups=scheduling.x-k8s.io,resources=podgroups/status,verbs=get;update;patch
+// +kubebuilder:rbac:groups=scheduling.x-k8s.io,resources=podgroups/finalizers,verbs=update
+
+// Reconcile is part of the main kubernetes reconciliation loop which aims to
+// move the current state of the cluster closer to the desired state.
+// TODO(user): Modify the Reconcile function to compare the state specified by
+// the PodGroup object against the actual cluster state, and then
+// perform operations to make the cluster state reflect the state specified by
+// the user.
+//
+// For more details, check Reconcile and its Result here:
+// - https://pkg.go.dev/sigs.k8s.io/controller-runtime@v0.11.0/pkg/reconcile
+func (r *PodGroupReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) {
+	log := log.FromContext(ctx)
+	log.Info("reconciling flux-framework/fluence-controller")
+	pg := &schedv1alpha1.PodGroup{}
+	if err := r.Get(ctx, req.NamespacedName, pg); err != nil {
+		if apierrs.IsNotFound(err) {
+			log.V(5).Info("Pod group has been deleted")
+			return ctrl.Result{}, nil
+		}
+		log.V(3).Error(err, "Unable to retrieve pod group")
+		return ctrl.Result{}, err
+	}
+
+	if pg.Status.Phase == schedv1alpha1.PodGroupFinished ||
+		pg.Status.Phase == schedv1alpha1.PodGroupFailed {
+		return ctrl.Result{}, nil
+	}
+	// If startScheduleTime - createTime > 2days,
+	// do not reconcile again because pod may have been GCed
+	if (pg.Status.Phase == schedv1alpha1.PodGroupScheduling || pg.Status.Phase == schedv1alpha1.PodGroupPending) && pg.Status.Running == 0 &&
+		pg.Status.ScheduleStartTime.Sub(pg.CreationTimestamp.Time) > 48*time.Hour {
+		r.recorder.Event(pg, v1.EventTypeWarning,
+			"Timeout", "schedule time longer than 48 hours")
+		return ctrl.Result{}, nil
+	}
+
+	podList := &v1.PodList{}
+	if err := r.List(ctx, podList,
+		client.MatchingLabelsSelector{
+			Selector: labels.Set(map[string]string{
+				schedv1alpha1.PodGroupLabel: pg.Name}).AsSelector(),
+		}); err != nil {
+		log.Error(err, "List pods for group failed")
+		return ctrl.Result{}, err
+	}
+	pods := podList.Items
+
+	pgCopy := pg.DeepCopy()
+	switch pgCopy.Status.Phase {
+	case "":
+		pgCopy.Status.Phase = schedv1alpha1.PodGroupPending
+	case schedv1alpha1.PodGroupPending:
+		if len(pods) >= int(pg.Spec.MinMember) {
+			pgCopy.Status.Phase = schedv1alpha1.PodGroupScheduling
+			fillOccupiedObj(pgCopy, &pods[0])
+		}
+	default:
+		pgCopy.Status.Running, pgCopy.Status.Succeeded, pgCopy.Status.Failed = getCurrentPodStats(pods)
+		if len(pods) < int(pg.Spec.MinMember) {
+			pgCopy.Status.Phase = schedv1alpha1.PodGroupPending
+			break
+		}
+
+		if pgCopy.Status.Succeeded+pgCopy.Status.Running < pg.Spec.MinMember {
+			pgCopy.Status.Phase = schedv1alpha1.PodGroupScheduling
+		}
+
+		if pgCopy.Status.Succeeded+pgCopy.Status.Running >= pg.Spec.MinMember {
+			pgCopy.Status.Phase = schedv1alpha1.PodGroupRunning
+		}
+		// Final state of pod group
+		if pgCopy.Status.Failed != 0 &&
+			pgCopy.Status.Failed+pgCopy.Status.Running+pgCopy.Status.Succeeded >= pg.Spec.MinMember {
+			pgCopy.Status.Phase = schedv1alpha1.PodGroupFailed
+		}
+		if pgCopy.Status.Succeeded >= pg.Spec.MinMember {
+			pgCopy.Status.Phase = schedv1alpha1.PodGroupFinished
+		}
+	}
+
+	return r.patchPodGroup(ctx, pg, pgCopy)
+}
+
+func (r *PodGroupReconciler) patchPodGroup(ctx context.Context, old, new *schedv1alpha1.PodGroup) (ctrl.Result, error) {
+	patch := client.MergeFrom(old)
+	if err := r.Status().Patch(ctx, new, patch); err != nil {
+		return ctrl.Result{}, err
+	}
+	err := r.Patch(ctx, new, patch)
+	return ctrl.Result{}, err
+}
+
+func getCurrentPodStats(pods []v1.Pod) (int32, int32, int32) {
+	if len(pods) == 0 {
+		return 0, 0, 0
+	}
+
+	var (
+		running   int32 = 0
+		succeeded int32 = 0
+		failed    int32 = 0
+	)
+	for _, pod := range pods {
+		switch pod.Status.Phase {
+		case v1.PodRunning:
+			running++
+		case v1.PodSucceeded:
+			succeeded++
+		case v1.PodFailed:
+			failed++
+		}
+	}
+	return running, succeeded, failed
+}
+
+func fillOccupiedObj(pg *schedv1alpha1.PodGroup, pod *v1.Pod) {
+	if len(pod.OwnerReferences) == 0 {
+		return
+	}
+
+	var refs []string
+	for _, ownerRef := range pod.OwnerReferences {
+		refs = append(refs, fmt.Sprintf("%s/%s", pod.Namespace, ownerRef.Name))
+	}
+	if len(refs) != 0 {
+		sort.Strings(refs)
+		pg.Status.OccupiedBy = strings.Join(refs, ",")
+	}
+}
+
+// SetupWithManager sets up the controller with the Manager.
+func (r *PodGroupReconciler) SetupWithManager(mgr ctrl.Manager) error {
+	r.recorder = mgr.GetEventRecorderFor("PodGroupController")
+	r.log = mgr.GetLogger()
+	r.log.Info("setup with manager flux-framework/fluence-controller")
+
+	return ctrl.NewControllerManagedBy(mgr).
+		Watches(&v1.Pod{}, handler.EnqueueRequestsFromMapFunc(r.podToPodGroup)).
+		For(&schedv1alpha1.PodGroup{}).
+		WithOptions(controller.Options{MaxConcurrentReconciles: r.Workers}).
+		Complete(r)
+}
+
+// podToPodGroup is a watcher that looks for pods and associated pod group
+func (r *PodGroupReconciler) podToPodGroup(ctx context.Context, obj client.Object) []ctrl.Request {
+
+	pod, ok := obj.(*v1.Pod)
+	if !ok {
+		return nil
+	}
+	r.log.Info("podToPodGroup flux-framework/fluence-controller")
+	r.log.V(5).Info("Running podToPodGroup", "pod", pod.Name, "namespace", pod.Namespace)
+	pgName := util.GetPodGroupLabel(pod)
+	if len(pgName) == 0 {
+		return nil
+	}
+
+	r.log.V(5).Info("Add pod group when pod gets added", "podGroup", pgName, "pod", pod.Name, "namespace", pod.Namespace)
+
+	return []ctrl.Request{{
+		NamespacedName: types.NamespacedName{
+			Namespace: pod.Namespace,
+			Name:      pgName,
+		}}}
+}

From 41b2ad284e92ebe98048fd6acf15babb5dc2f053 Mon Sep 17 00:00:00 2001
From: vsoch <vsoch@users.noreply.github.com>
Date: Sat, 17 Feb 2024 01:09:31 -0700
Subject: [PATCH 10/28] docker: simplify fluence build to use fluxion-go

Signed-off-by: vsoch <vsoch@users.noreply.github.com>
---
 src/Makefile                   |   8 +-
 src/build/scheduler/Dockerfile | 129 +++------------------------------
 src/fluence/fluxion/fluxion.go |   2 +-
 src/fluence/go.mod             |   4 +-
 src/fluence/go.sum             |   4 +-
 5 files changed, 21 insertions(+), 126 deletions(-)

diff --git a/src/Makefile b/src/Makefile
index 3392add..af5fcb3 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -1,12 +1,12 @@
 
-FLUX_SCHED_ROOT ?= /home/flux-sched
+FLUX_SCHED_ROOT ?= /opt/flux-sched
 INSTALL_PREFIX ?= /usr
+LIB_PREFIX ?= /usr/lib
 LOCALBIN ?= $(shell pwd)/bin
 COMMONENVVAR=GOOS=$(shell uname -s | tr A-Z a-z)
+#BUILDENVVAR=CGO_CFLAGS="-I${FLUX_SCHED_ROOT}/resource/reapi/bindings/c" CGO_LDFLAGS="-L${INSTALL_PREFIX}/lib -L${FLUX_SCHED_ROOT}/resource -lresource -L${FLUX_SCHED_ROOT}/resource/libjobspec -ljobspec_conv -L/${FLUX_SCHED_ROOT}/resource/reapi/bindings -lreapi_cli -lflux-idset -lstdc++ -lczmq -ljansson -lhwloc -lboost_system -lflux-hostlist -lboost_graph -lyaml-cpp"
+BUILDENVVAR=CGO_CFLAGS="-I${FLUX_SCHED_ROOT} -I${FLUX_SCHED_ROOT}/resource/reapi/bindings/c" CGO_LDFLAGS="-L${LIB_PREFIX} -L${LIB_PREFIX}/flux -L${FLUX_SCHED_ROOT}/resource/reapi/bindings -lreapi_cli -lflux-idset -lstdc++ -lczmq -ljansson -lhwloc -lboost_system -lflux-hostlist -lboost_graph -lyaml-cpp"
 
-# This is what worked
-# GOOS=linux CGO_CFLAGS="-I/home/flux-sched/resource/reapi/bindings/c" CGO_LDFLAGS="-L/usr/lib -L/home/flux-sched/resource -lresource -L/home/flux-sched/resource/libjobspec -ljobspec_conv -L/home/flux-sched/resource/reapi/bindings -lreapi_cli -lflux-idset -lstdc++ -lczmq -ljansson -lhwloc -lboost_system -lflux-hostlist -lboost_graph -lyaml-cpp" go build -ldflags '-w' -o bin/server cmd/main.go
-BUILDENVVAR=CGO_CFLAGS="-I${FLUX_SCHED_ROOT}/resource/reapi/bindings/c" CGO_LDFLAGS="-L${INSTALL_PREFIX}/lib -L${FLUX_SCHED_ROOT}/resource -lresource -L${FLUX_SCHED_ROOT}/resource/libjobspec -ljobspec_conv -L/${FLUX_SCHED_ROOT}/resource/reapi/bindings -lreapi_cli -lflux-idset -lstdc++ -lczmq -ljansson -lhwloc -lboost_system -lflux-hostlist -lboost_graph -lyaml-cpp"
 
 LOCAL_REGISTRY=localhost:5000
 LOCAL_IMAGE=fluence-sidecar:latest
diff --git a/src/build/scheduler/Dockerfile b/src/build/scheduler/Dockerfile
index 15a9678..67bd5ce 100644
--- a/src/build/scheduler/Dockerfile
+++ b/src/build/scheduler/Dockerfile
@@ -1,138 +1,33 @@
-FROM ubuntu:latest as base
+FROM fluxrm/flux-sched:jammy
 
-RUN apt -y update && apt -y upgrade && apt -y clean && apt -y autoremove
+USER root
 ENV DEBIAN_FRONTEND=noninteractive
 ENV GO_VERSION=1.19.10
-ENV INSTALL_PREFIX=/usr
 
-RUN apt install -y --no-install-recommends tzdata && \
-    apt -y --no-install-recommends install \
-    aspell \
-    aspell-en \
-    autoconf \
-    automake \
-    curl \
-    git \    
-    libc6-dev \
-    libczmq-dev \
-    libmpich-dev \
-    libncurses5-dev \
-    libelf-dev \ 
-    libssl-dev \
-    libtool \
-    libsodium-dev \
-    libzmq3-dev \
-    libjansson-dev \
-    liblz4-dev \
-    libhwloc-dev \
-    libsqlite3-dev \
-    lua5.1 \
-    liblua5.1-dev \
-    lua-posix \
-    make \
-    openssh-client \
-    python3-dev \
-    python3-cffi \
-    python3-six \
-    python3-yaml \
-    python3-jsonschema \
-    python3-sphinx \
-    python3-pip \
-    python3-setuptools \
-    systemd \
-    wget \
-    uuid-dev && \
-    apt -y clean  && apt -y autoremove
-
-RUN echo 'alias python="/usr/bin/python3.8"' >> /root/.bashrc && \
-    echo 'alias pip="/usr/bin/pip3"' >> /root/.bashrc && \
-    . /root/.bashrc
-
-RUN echo 'set number' >> /root/.vimrc 
-
-# Install cmake for new build system
-RUN curl -s -L https://github.com/Kitware/CMake/releases/download/v3.26.4/cmake-3.26.4-linux-$(uname -m).sh > cmake.sh ;\
-    bash cmake.sh --prefix=/usr/local --skip-license ;\
-    rm cmake.sh
-
-# Remove Python 2
-RUN apt purge -y python2.7-minimal
-
-# Python 3 should be linked to python
-RUN ln -s /usr/bin/python3 /usr/bin/python
-RUN apt install -y python3-pip \
-    &&  apt -y --no-install-recommends install \
-    libhwloc-dev \
-    libboost-dev \
-    libboost-system-dev \
-    libboost-filesystem-dev \
-    libboost-graph-dev \
-    libboost-regex-dev \
-    libxml2-dev \
-    libyaml-cpp-dev \ 
-    python3-yaml \
-    libedit-dev \
-    libarchive-dev \
-    pkg-config && apt -y clean  && apt -y autoremove
-
-RUN git clone https://github.com/flux-framework/flux-core.git /home/flux-core && \
-    cd /home/flux-core/ && \
-    ./autogen.sh && \
-    PYTHON_VERSION=3 ./configure --prefix=${INSTALL_PREFIX} && \ 
-    make && make install && \
-    cd ../ && \
-    rm -rf flux-core
+RUN apt-get update && apt-get clean -y && apt -y autoremove
 
 # Install go 19.10
 RUN wget https://go.dev/dl/go${GO_VERSION}.linux-amd64.tar.gz  && tar -xvf go${GO_VERSION}.linux-amd64.tar.gz && \
     mv go /usr/local && rm go${GO_VERSION}.linux-amd64.tar.gz
 
-ENV GOROOT=/usr/local/go
-ENV GOPATH=/go
-ENV PATH="$GOROOT/bin:$PATH"
-RUN mkdir -p /go/src
+# ENV GOROOT=/usr/local/go
+# ENV GOPATH=/go
+ENV PATH=/usr/local/go/bin:$PATH
 RUN flux keygen
+RUN git clone https://github.com/flux-framework/flux-sched.git /opt/flux-sched
 
-ENV WITH_GO=yes
-RUN git clone https://github.com/flux-framework/flux-sched.git /home/flux-sched && \ 
-    cd /home/flux-sched/ && \
-    # Ensure we pin to variant that has STATIC - will update when fix is in
-    git fetch && git checkout v0.31.0 && \
-    # These need to be shared libraries 
-    # https://github.com/flux-framework/flux-sched/pull/1094
-    sed -i 's/add_library(resource STATIC/add_library(resource SHARED/g' resource/CMakeLists.txt && \
-    sed -i 's/add_library ( reapi_module STATIC/add_library ( reapi_module SHARED/g' resource/reapi/bindings/CMakeLists.txt && \
-    sed -i 's/add_library ( reapi_cli STATIC/add_library ( reapi_cli SHARED/g' resource/reapi/bindings/CMakeLists.txt && \
-    sed -i 's/add_library ( jobspec_conv STATIC/add_library ( jobspec_conv SHARED/g' resource/libjobspec/CMakeLists.txt && \
-    PYTHON_VERSION=3 ./configure --prefix=${INSTALL_PREFIX} && \
-    make && make install
-
-RUN apt purge -y \
-    python3-dev \
-    python3-cffi \
-    python3-six \
-    python3-yaml \
-    python3-jsonschema \
-    python3-sphinx \
-    python3-pip \
-    python3-setuptools \
-    && apt -y clean && apt -y autoremove
-
-ENV PATH=/usr/local/go/bin:$PATH
+# Go dependencies for protobuf
 RUN apt -y update && apt -y upgrade && apt install --no-install-recommends -y protobuf-compiler curl && \
     go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.26 && \ 
     go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@v1.1
 
 # These need to be on the LD_LIBRARY_PATH for the server to find at runtime
-# This mimcs what we use to build server
-ENV LD_LIBRARY_PATH="${LD_LIBRARY_PATH}:/usr/lib:/home/flux-sched/resource:/home/flux-sched/resource/libjobspec:/home/flux-sched/resource/reapi/bindings"
-COPY fluence Makefile /go/src/fluence/
+ENV LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/usr/lib:/usr/lib/flux
 WORKDIR /go/src/fluence/
+COPY fluence Makefile /go/src/fluence/
 
-# This is the 0.31.0 tag of flux-sched (same as we install above)
-RUN go get -u github.com/flux-framework/flux-sched/resource/reapi/bindings/go/src/fluxcli@250eac78a6753253fc8353a3504d7e843d1b6b24 && \
-    go mod tidy && \
+RUN go mod tidy && \
     go mod vendor && \
-    make server FLUX_SCHED_ROOT=/home/flux-sched INSTALL_PREFIX=${INSTALL_PREFIX} && \
+    make server FLUX_SCHED_ROOT=/opt/flux-sched && \
     mkdir -p /home/data/jobspecs /home/data/jgf && \
     chmod -R ugo+rwx /home/data
\ No newline at end of file
diff --git a/src/fluence/fluxion/fluxion.go b/src/fluence/fluxion/fluxion.go
index f29ac62..5775199 100644
--- a/src/fluence/fluxion/fluxion.go
+++ b/src/fluence/fluxion/fluxion.go
@@ -7,7 +7,7 @@ import (
 	pb "github.com/flux-framework/flux-k8s/flux-plugin/fluence/fluxcli-grpc"
 	"github.com/flux-framework/flux-k8s/flux-plugin/fluence/jobspec"
 	"github.com/flux-framework/flux-k8s/flux-plugin/fluence/utils"
-	"github.com/flux-framework/flux-sched/resource/reapi/bindings/go/src/fluxcli"
+	"github.com/flux-framework/fluxion-go/pkg/fluxcli"
 	"k8s.io/klog/v2"
 
 	"context"
diff --git a/src/fluence/go.mod b/src/fluence/go.mod
index 5409a2a..5c57652 100644
--- a/src/fluence/go.mod
+++ b/src/fluence/go.mod
@@ -3,13 +3,14 @@ module github.com/flux-framework/flux-k8s/flux-plugin/fluence
 go 1.19
 
 require (
-	github.com/flux-framework/flux-sched/resource/reapi/bindings/go v0.0.0-20231213021445-250eac78a675
+	github.com/flux-framework/fluxion-go v0.32.0
 	google.golang.org/grpc v1.38.0
 	google.golang.org/protobuf v1.26.0
 	gopkg.in/yaml.v2 v2.4.0
 	k8s.io/api v0.22.3
 	k8s.io/apimachinery v0.22.3
 	k8s.io/client-go v0.22.3
+	k8s.io/klog/v2 v2.9.0
 	k8s.io/kubectl v0.0.0
 )
 
@@ -34,7 +35,6 @@ require (
 	google.golang.org/genproto v0.0.0-20210602131652-f16073e35f0c // indirect
 	gopkg.in/inf.v0 v0.9.1 // indirect
 	gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b // indirect
-	k8s.io/klog/v2 v2.9.0 // indirect
 	k8s.io/utils v0.0.0-20210819203725-bdf08cb9a70a // indirect
 	sigs.k8s.io/structured-merge-diff/v4 v4.1.2 // indirect
 	sigs.k8s.io/yaml v1.2.0 // indirect
diff --git a/src/fluence/go.sum b/src/fluence/go.sum
index 19e571c..5700215 100644
--- a/src/fluence/go.sum
+++ b/src/fluence/go.sum
@@ -98,8 +98,8 @@ github.com/exponent-io/jsonpath v0.0.0-20151013193312-d6023ce2651d/go.mod h1:ZZM
 github.com/fatih/camelcase v1.0.0/go.mod h1:yN2Sb0lFhZJUdVvtELVWefmrXpuZESvPmqwoZc+/fpc=
 github.com/fatih/color v1.7.0/go.mod h1:Zm6kSWBoL9eyXnKyktHP6abPY2pDugNf5KwzbycvMj4=
 github.com/felixge/httpsnoop v1.0.1/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U=
-github.com/flux-framework/flux-sched/resource/reapi/bindings/go v0.0.0-20231213021445-250eac78a675 h1:FgEA3pnL/kDoLaVOUDa401yainApQJaow9jeBPg4dek=
-github.com/flux-framework/flux-sched/resource/reapi/bindings/go v0.0.0-20231213021445-250eac78a675/go.mod h1:yhmzNyn45YhoxEohh1Sl3h3izLMqL7qpcvmYTRpv7eY=
+github.com/flux-framework/fluxion-go v0.32.0 h1:NY6Y1mlTTTZhHD+CmAsDsdNTxUsAFDQoORpMZj8NFLI=
+github.com/flux-framework/fluxion-go v0.32.0/go.mod h1:ZI3QxSvUfgJE2Snur/SntJmVfpMjr6D4ICVmdqJ9fkQ=
 github.com/form3tech-oss/jwt-go v3.2.2+incompatible/go.mod h1:pbq4aXjuKjdthFRnoDwaVPLA+WlJuPGy+QneDUgJi2k=
 github.com/form3tech-oss/jwt-go v3.2.3+incompatible/go.mod h1:pbq4aXjuKjdthFRnoDwaVPLA+WlJuPGy+QneDUgJi2k=
 github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo=

From 1c0e5a32983d22f8f4081457022bd73a4fc351ff Mon Sep 17 00:00:00 2001
From: vsoch <vsoch@users.noreply.github.com>
Date: Sat, 17 Feb 2024 16:06:40 -0700
Subject: [PATCH 11/28] ci: add support to build and deploy fluence-controller

Signed-off-by: vsoch <vsoch@users.noreply.github.com>
---
 .github/test.sh                     |  2 ++
 .github/workflows/build-deploy.yaml | 43 +++++++++++++++++++++++++--
 .github/workflows/test.yaml         | 45 ++++++++++++++++++++++-------
 3 files changed, 77 insertions(+), 13 deletions(-)

diff --git a/.github/test.sh b/.github/test.sh
index 44314ad..2b8b1e6 100644
--- a/.github/test.sh
+++ b/.github/test.sh
@@ -18,6 +18,8 @@ cd upstream/manifests/install/charts
 helm install \
   --set scheduler.image=ghcr.io/flux-framework/fluence:latest \
   --set scheduler.sidecarimage=ghcr.io/flux-framework/fluence-sidecar:latest \
+  --set controller.image=ghcr.io/flux-framework/fluence-controller:latest \
+  --set controller.pullPolicy=Never \
   --set scheduler.pullPolicy=Never \
   --set scheduler.sidecarPullPolicy=Never \
     schedscheduler-plugins as-a-second-scheduler/
diff --git a/.github/workflows/build-deploy.yaml b/.github/workflows/build-deploy.yaml
index c993aa9..575d2db 100644
--- a/.github/workflows/build-deploy.yaml
+++ b/.github/workflows/build-deploy.yaml
@@ -18,7 +18,7 @@ jobs:
     name: build fluence
     steps:
     - uses: actions/checkout@v4
-    - uses: actions/setup-go@v3
+    - uses: actions/setup-go@v4
       with:
         go-version: ^1.19
 
@@ -45,7 +45,44 @@ jobs:
     - name: Deploy Container
       if: (github.event_name != 'pull_request')
       run: docker push ${{ env.container }} --all-tags
-      
+
+  build-controller:
+    permissions:
+      packages: write
+    env:
+      container: ghcr.io/flux-framework/fluence-controller
+    runs-on: ubuntu-latest
+    name: build fluence-controller
+    steps:
+    - uses: actions/checkout@v4
+    - uses: actions/setup-go@v4
+      with:
+        go-version: ^1.19
+
+    - name: Build Containers
+      run: |
+        make prepare
+        make build REGISTRY=ghcr.io/flux-framework CONTROLLER_IMAGE=fluence-controller
+
+    - name: Tag Release Image
+      if: (github.event_name == 'release')
+      run: |
+        tag=${GITHUB_REF#refs/tags/}
+        echo "Tagging and releasing ${{ env.container}}:${tag}"        
+        docker tag ${{ env.container }}:latest ${{ env.container }}:${tag}
+
+    - name: GHCR Login
+      if: (github.event_name != 'pull_request')
+      uses: docker/login-action@v2
+      with:
+        registry: ghcr.io
+        username: ${{ github.actor }}
+        password: ${{ secrets.GITHUB_TOKEN }}
+
+    - name: Deploy Container
+      if: (github.event_name != 'pull_request')
+      run: docker push ${{ env.container }} --all-tags
+
   build-sidecar:
     permissions:
       packages: write
@@ -55,7 +92,7 @@ jobs:
     name: build sidecar
     steps:
     - uses: actions/checkout@v4
-    - uses: actions/setup-go@v3
+    - uses: actions/setup-go@v4
       with:
         go-version: ^1.19
 
diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml
index 3e24a33..98e2de2 100644
--- a/.github/workflows/test.yaml
+++ b/.github/workflows/test.yaml
@@ -11,30 +11,42 @@ on:
 
 jobs:
   build-fluence:
+
+    # The scheduler and controller are built together with the hack script
+    # in the upstream scheduler-plugins
     env:
       container: ghcr.io/flux-framework/fluence
+      controller: ghcr.io/flux-framework/fluence-controller
     runs-on: ubuntu-latest
     name: build fluence
     steps:
     - uses: actions/checkout@v4
-    - uses: actions/setup-go@v3
+    - uses: actions/setup-go@v4
       with:
         go-version: ^1.19
 
     - name: Build Containers
       run: |
         make prepare
-        make build REGISTRY=ghcr.io/flux-framework SCHEDULER_IMAGE=fluence
+        make build REGISTRY=ghcr.io/flux-framework SCHEDULER_IMAGE=fluence CONTROLLER_IMAGE=fluence-controller
 
-    - name: Save Container
-      run: docker save ${{ env.container }} | gzip > fluence_latest.tar.gz
+    - name: Save Containers
+      run: |
+        docker save ${{ env.container }} | gzip > fluence_latest.tar.gz
+        docker save ${{ env.controller }} | gzip > fluence_controller_latest.tar.gz
 
     - name: Upload container artifact
       uses: actions/upload-artifact@v4
       with:
         name: fluence
         path: fluence_latest.tar.gz
-      
+
+    - name: Upload container artifact
+      uses: actions/upload-artifact@v4
+      with:
+        name: fluence_controller
+        path: fluence_controller_latest.tar.gz
+
   build-sidecar:
     env:
       container: ghcr.io/flux-framework/fluence-sidecar
@@ -42,7 +54,7 @@ jobs:
     name: build sidecar
     steps:
     - uses: actions/checkout@v4
-    - uses: actions/setup-go@v3
+    - uses: actions/setup-go@v4
       with:
         go-version: ^1.19
 
@@ -59,7 +71,7 @@ jobs:
       with:
         name: fluence_sidecar
         path: fluence_sidecar_latest.tar.gz
-      
+
   test-fluence:
     needs: [build-fluence, build-sidecar]
     permissions:
@@ -67,14 +79,15 @@ jobs:
     env:
       fluence_container: ghcr.io/flux-framework/fluence
       sidecar_container: ghcr.io/flux-framework/fluence-sidecar
+      controller_container: ghcr.io/flux-framework/fluence-controller
 
     runs-on: ubuntu-latest
-    name: build fluence
+    name: test fluence
     steps:
     - uses: actions/checkout@v4
-    - uses: actions/setup-go@v3
+    - uses: actions/setup-go@v4
       with:
-        go-version: ^1.20
+        go-version: ^1.19
 
     - name: Download fluence artifact
       uses: actions/download-artifact@v4
@@ -88,11 +101,18 @@ jobs:
         name: fluence_sidecar
         path: /tmp
 
+    - name: Download fluence_controller artifact
+      uses: actions/download-artifact@v4
+      with:
+        name: fluence_controller
+        path: /tmp
+
     - name: Load Docker images
       run: |
         ls /tmp/*.tar.gz
         docker load --input /tmp/fluence_sidecar_latest.tar.gz
         docker load --input /tmp/fluence_latest.tar.gz
+        docker load --input /tmp/fluence_controller_latest.tar.gz
         docker image ls -a | grep fluence
 
     - name: Create Kind Cluster
@@ -106,10 +126,12 @@ jobs:
       env:
         fluence: ${{ env.fluence_container }}
         sidecar: ${{ env.sidecar_container }}
+        controller: ${{ env.controller_container }}
         GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
       run: |
         kind load docker-image ${fluence}
         kind load docker-image ${sidecar}
+        kind load docker-image ${controller}
 
     - name: Test Fluence
       run: /bin/bash ./.github/test.sh
@@ -122,6 +144,8 @@ jobs:
         docker tag ${{ env.fluence_container }}:latest ${{ env.fluence_container }}:${tag}
         echo "Tagging and releasing ${{ env.sidecar_container}}:${tag}"        
         docker tag ${{ env.sidecar_container }}:latest ${{ env.sidecar_container }}:${tag}
+        echo "Tagging and releasing ${{ env.controller_container}}:${tag}"        
+        docker tag ${{ env.controller_container }}:latest ${{ env.controller_container }}:${tag}
 
      # If we get here, tests pass, and we can deploy
     - name: GHCR Login
@@ -137,3 +161,4 @@ jobs:
       run: |
         docker push ${{ env.fluence_container }} --all-tags
         docker push ${{ env.sidecar_container }} --all-tags
+        docker push ${{ env.controller_container }} --all-tags
\ No newline at end of file

From 8add1e0f4df2f8adb2febc5265a8f3a01ffb0787 Mon Sep 17 00:00:00 2001
From: vsoch <vsoch@users.noreply.github.com>
Date: Sat, 17 Feb 2024 13:44:36 -0700
Subject: [PATCH 12/28] feat: add start of webhook

Problem: we want to try a design where a mutating admission webhook can handle
receiving and creating PodGroup from labels. We are choosing mutating with
the expectation that, at some point, we might be able to change the size
(min/max/desired) either for the PodGroup or some other watcher to jobs.
Note that this is an empty skeleton - the webhook is added and running
but basically doing nothing. I am also fixing a bug that I noticed while
running kind that fluence was assigning work to the control plane. I think
there maybe used to be logic (a commented out worker label) that was
anticipating doing a check for a control plane, but it looks like on
production clusters we do not always haave access and it was never
finished. Note that this addition does not guarantee this design will
work, but it is just one step. Since the helm charts are manually
genereated for the scheduler-plugin (as far as I can tell) this
took me almost 6 hours to figure out and get working. I am really
starting to think there is no skill behind software engineering
beyond absolute patience.

Signed-off-by: vsoch <vsoch@users.noreply.github.com>
---
 .github/test-kind-config.yaml                 |   5 +
 .github/workflows/test.yaml                   |  15 ++
 Makefile                                      |   8 +-
 README.md                                     |  26 ++-
 .../simple_example/fluence-scheduler-pod.yaml |   2 +-
 .../scheduling/v1alpha1/podgroup_webhook.go   | 190 ++++++++++++++++++
 .../cmd/controller/app/server.go              | 101 ++++++++++
 .../crds/scheduling.x-k8s.io_podgroups.yaml   | 108 ++++++++++
 .../templates/deployment.yaml                 |  13 ++
 .../mutating-webhook-configuration.yaml       |  40 ++++
 .../templates/selfsigned-issuer.yaml          |  10 +
 .../templates/serving-cert.yaml               |  17 ++
 .../templates/webhook-service.yaml            |  15 ++
 .../charts/as-a-second-scheduler/values.yaml  |  10 +-
 src/fluence/utils/utils.go                    |  18 +-
 15 files changed, 565 insertions(+), 13 deletions(-)
 create mode 100644 .github/test-kind-config.yaml
 create mode 100644 sig-scheduler-plugins/apis/scheduling/v1alpha1/podgroup_webhook.go
 create mode 100644 sig-scheduler-plugins/cmd/controller/app/server.go
 create mode 100644 sig-scheduler-plugins/manifests/install/charts/as-a-second-scheduler/crds/scheduling.x-k8s.io_podgroups.yaml
 create mode 100644 sig-scheduler-plugins/manifests/install/charts/as-a-second-scheduler/templates/mutating-webhook-configuration.yaml
 create mode 100644 sig-scheduler-plugins/manifests/install/charts/as-a-second-scheduler/templates/selfsigned-issuer.yaml
 create mode 100644 sig-scheduler-plugins/manifests/install/charts/as-a-second-scheduler/templates/serving-cert.yaml
 create mode 100644 sig-scheduler-plugins/manifests/install/charts/as-a-second-scheduler/templates/webhook-service.yaml

diff --git a/.github/test-kind-config.yaml b/.github/test-kind-config.yaml
new file mode 100644
index 0000000..0fe29e7
--- /dev/null
+++ b/.github/test-kind-config.yaml
@@ -0,0 +1,5 @@
+kind: Cluster
+apiVersion: kind.x-k8s.io/v1alpha4
+nodes:
+- role: control-plane
+- role: worker
\ No newline at end of file
diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml
index 98e2de2..ed45891 100644
--- a/.github/workflows/test.yaml
+++ b/.github/workflows/test.yaml
@@ -107,12 +107,21 @@ jobs:
         name: fluence_controller
         path: /tmp
 
+    - name: Make Space For Build
+      run: |
+          sudo rm -rf /usr/share/dotnet
+          sudo rm -rf /usr/local/lib/android
+          sudo rm -rf /opt/ghc
+          
     - name: Load Docker images
       run: |
         ls /tmp/*.tar.gz
         docker load --input /tmp/fluence_sidecar_latest.tar.gz
+        rm /tmp/fluence_sidecar_latest.tar.gz
         docker load --input /tmp/fluence_latest.tar.gz
+        rm /tmp/fluence_latest.tar.gz
         docker load --input /tmp/fluence_controller_latest.tar.gz
+        rm /tmp/fluence_controller_latest.tar.gz
         docker image ls -a | grep fluence
 
     - name: Create Kind Cluster
@@ -121,6 +130,7 @@ jobs:
         cluster_name: kind
         kubectl_version: v1.28.2
         version: v0.20.0
+        config: ./.github/test-kind-config.yaml
         
     - name: Load Docker Containers into Kind
       env:
@@ -133,6 +143,11 @@ jobs:
         kind load docker-image ${sidecar}
         kind load docker-image ${controller}
 
+    - name: Install Cert Manager
+      run: |
+        kubectl apply -f https://github.com/cert-manager/cert-manager/releases/download/v1.13.1/cert-manager.yaml
+        sleep 10
+
     - name: Test Fluence
       run: /bin/bash ./.github/test.sh
 
diff --git a/Makefile b/Makefile
index 97efa75..8976cb4 100644
--- a/Makefile
+++ b/Makefile
@@ -27,15 +27,19 @@ prepare: clone
 	# These are entirely new directory structures
 	rm -rf $(CLONE_UPSTREAM)/pkg/fluence
 	rm -rf $(CLONE_UPSTREAM)/pkg/controllers/podgroup_controller.go
-	rm -rf $(CLONE_UPSTREAM)/manifests/fluence
+	rm -rf $(CLONE_UPSTREAM)/apis/scheduling/v1alpha1/podgroup_webhook.go
+	rm -rf $(CLONE_UPSTREAM)/cmd/controller/app/server.go
 	cp -R sig-scheduler-plugins/pkg/fluence $(CLONE_UPSTREAM)/pkg/fluence
 	cp -R sig-scheduler-plugins/pkg/controllers/* $(CLONE_UPSTREAM)/pkg/controllers/
 	# This is the one exception not from sig-scheduler-plugins because it is needed in both spots
 	cp -R src/fluence/fluxcli-grpc $(CLONE_UPSTREAM)/pkg/fluence/fluxcli-grpc
 	# These are files with subtle changes to add fluence
 	cp sig-scheduler-plugins/cmd/scheduler/main.go ./upstream/cmd/scheduler/main.go
-	cp sig-scheduler-plugins/manifests/install/charts/as-a-second-scheduler/templates/deployment.yaml $(CLONE_UPSTREAM)/manifests/install/charts/as-a-second-scheduler/templates/deployment.yaml
+	cp sig-scheduler-plugins/manifests/install/charts/as-a-second-scheduler/templates/*.yaml $(CLONE_UPSTREAM)/manifests/install/charts/as-a-second-scheduler/templates/
+	cp sig-scheduler-plugins/manifests/install/charts/as-a-second-scheduler/crds/*.yaml $(CLONE_UPSTREAM)/manifests/install/charts/as-a-second-scheduler/crds/
 	cp sig-scheduler-plugins/manifests/install/charts/as-a-second-scheduler/values.yaml $(CLONE_UPSTREAM)/manifests/install/charts/as-a-second-scheduler/values.yaml
+	cp sig-scheduler-plugins/apis/scheduling/v1alpha1/podgroup_webhook.go $(CLONE_UPSTREAM)/apis/scheduling/v1alpha1/podgroup_webhook.go
+	cp sig-scheduler-plugins/cmd/controller/app/server.go $(CLONE_UPSTREAM)/cmd/controller/app/server.go
 
 build: prepare
 	REGISTRY=${REGISTRY} IMAGE=${SCHEDULER_IMAGE} CONTROLLER_IMAGE=${CONTROLLER_IMAGE} $(BASH) $(CLONE_UPSTREAM)/hack/build-images.sh
diff --git a/README.md b/README.md
index f0d67cd..3821ad8 100644
--- a/README.md
+++ b/README.md
@@ -8,7 +8,7 @@ Fluence enables HPC-grade pod scheduling in Kubernetes via the [Kubernetes Sched
 
 ## Getting started
 
-For instructions on how to start Fluence on a K8s cluster, see [examples](examples/). Documentation and instructions for reproducing our CANOPIE2022 paper (citation below) can be found in the [canopie22-artifacts branch](https://github.com/flux-framework/flux-k8s/tree/canopie22-artifacts).
+For instructions on how to start Fluence on a K8s cluster, see [examples](examples/). Documentation and instructions for reproducing our CANOPIE-2022 paper (citation below) can be found in the [canopie22-artifacts branch](https://github.com/flux-framework/flux-k8s/tree/canopie22-artifacts).
 For background on the Flux framework and the Fluxion scheduler, you can take a look at our award-winning R&D100 submission: https://ipo.llnl.gov/sites/default/files/2022-02/Flux_RD100_Final.pdf. For next steps:
 
  - To understand how it works, see [Design](#design)
@@ -56,8 +56,13 @@ pods with different names cannot be part of the same group that needs to be sche
 ### Deploy
 
 We provide a set of pre-build containers [alongside the repository](https://github.com/orgs/flux-framework/packages?repo_name=flux-k8s)
-that you can easily use to deploy Fluence right away! You'll simply need to clone the proper helm charts, and then install to your cluster.
-We provide helper commands to do that.
+that you can easily use to deploy Fluence right away! You'll first need to install the certificate manager:
+
+```bash
+kubectl apply -f https://github.com/cert-manager/cert-manager/releases/download/v1.13.1/cert-manager.yaml
+```
+
+And then clone the proper helm charts, and then install to your cluster. We provide helper commands to do that.
 
 ```bash
 # This clones the upstream scheduler plugins code, we will add fluence to it!
@@ -131,7 +136,13 @@ docker push docker.io/vanessa/fluence-controller
 These steps will require a Kubernetes cluster to install to, and having pushed the plugin container to a registry. If you aren't using a cloud provider, you can create a local one with `kind`:
 
 ```bash
-kind create cluster
+kind create cluster --config ./examples/kind-config.yaml
+```
+
+And install the certificate manager:
+
+```bash
+kubectl apply -f https://github.com/cert-manager/cert-manager/releases/download/v1.13.1/cert-manager.yaml
 ```
 
 **Important** if you are developing or testing fluence, note that custom scheduler plugins don't seem to work out of the box with MiniKube (but everything works with kind). Likely there are extensions or similar that need to be configured with MiniKube (that we have not looked into).
@@ -456,7 +467,7 @@ Note that if you want to enable extra endpoints for the fluence kubectl plugin a
 helm install \
   --set scheduler.image=ghcr.io/vsoch/fluence:latest \
   --set scheduler.enableExternalService=true \
-  --set controller.image=vanessa/fluence-controller \
+  --set controller.image=ghcr.io/vsoch/fluence-controller \
   --set scheduler.sidecarimage=ghcr.io/vsoch/fluence-sidecar:latest \
         fluence as-a-second-scheduler/
 ```
@@ -486,6 +497,11 @@ And to create:
 kind create cluster --config ./kind-config.yaml
 ```
 
+#### TODO
+
+ - Try what [kueue does](https://github.com/kubernetes-sigs/kueue/blob/6d57813a52066dab412735deeeb60ebb0cdb8e8e/cmd/kueue/main.go#L146-L155) to not require cert-manager.
+ - Possible bug with using kind (with custom config we are scheduling things to the control plane) - need to verify this didn't start happening with mutating webhook addition.
+
 #### Vanessa Thinking
 
 > Updated February 15, 2024
diff --git a/examples/simple_example/fluence-scheduler-pod.yaml b/examples/simple_example/fluence-scheduler-pod.yaml
index a7cc126..71a8463 100644
--- a/examples/simple_example/fluence-scheduler-pod.yaml
+++ b/examples/simple_example/fluence-scheduler-pod.yaml
@@ -1,7 +1,7 @@
 apiVersion: v1
 kind: Pod
 metadata:
-  name: fluence-scheduled-pod
+  name: fluence-scheduled-pod-1
   labels:
     name: scheduler-example
 spec:
diff --git a/sig-scheduler-plugins/apis/scheduling/v1alpha1/podgroup_webhook.go b/sig-scheduler-plugins/apis/scheduling/v1alpha1/podgroup_webhook.go
new file mode 100644
index 0000000..55c4d45
--- /dev/null
+++ b/sig-scheduler-plugins/apis/scheduling/v1alpha1/podgroup_webhook.go
@@ -0,0 +1,190 @@
+/*
+Copyright 2023 Lawrence Livermore National Security, LLC
+
+(c.f. AUTHORS, NOTICE.LLNS, COPYING)
+SPDX-License-Identifier: MIT
+*/
+
+// This file is not used, but maintained as the original addition of an OrasCache webhook
+
+package v1alpha1
+
+import (
+	"context"
+	"encoding/json"
+	"fmt"
+	"net/http"
+
+	batchv1 "k8s.io/api/batch/v1"
+	corev1 "k8s.io/api/core/v1"
+	runtime "k8s.io/apimachinery/pkg/runtime"
+	ctrl "sigs.k8s.io/controller-runtime"
+	"sigs.k8s.io/controller-runtime/pkg/manager"
+	"sigs.k8s.io/controller-runtime/pkg/webhook/admission"
+)
+
+var (
+	logger = ctrl.Log.WithName("setup")
+)
+
+// IMPORTANT: if you use the controller-runtime builder, it will derive this name automatically from the gvk (kind, version, etc. so find the actual created path in the logs)
+// kubectl describe mutatingwebhookconfigurations.admissionregistration.k8s.io
+// It will also only allow you to describe one object type with For()
+// This is disabled so we manually manage it - multiple types to a list did not work: config/webhook/manifests.yaml
+////kubebuilder:webhook:path=/mutate-v1-sidecar,mutating=true,failurePolicy=fail,sideEffects=None,groups=core;batch,resources=pods;jobs,verbs=create,versions=v1,name=morascache.kb.io,admissionReviewVersions=v1
+
+// NewMutatingWebhook allows us to keep the sidecarInjector private
+// If it's public it's exported and kubebuilder tries to add to zz_generated_deepcopy
+// and you get all kinds of terrible errors about admission.Decoder missing DeepCopyInto
+func NewMutatingWebhook(mgr manager.Manager) *fluenceWatcher {
+	return &fluenceWatcher{decoder: admission.NewDecoder(mgr.GetScheme())}
+}
+
+// mutate-v1-fluence
+
+type fluenceWatcher struct {
+	decoder *admission.Decoder
+}
+
+func (a *fluenceWatcher) Handle(ctx context.Context, req admission.Request) admission.Response {
+
+	logger.Info("Running webhook handle")
+	// First try for job
+	job := &batchv1.Job{}
+	err := a.decoder.Decode(req, job)
+	if err != nil {
+
+		// Try for a pod next
+		pod := &corev1.Pod{}
+		err := a.decoder.Decode(req, pod)
+		if err != nil {
+			logger.Error(err, "Admission error.")
+			return admission.Errored(http.StatusBadRequest, err)
+		}
+
+		// If we get here, we decoded a pod
+		/*err = a.InjectPod(pod)
+		if err != nil {
+			logger.Error("Inject pod error.", err)
+			return admission.Errored(http.StatusBadRequest, err)
+		}*/
+
+		// Mutate the fields in pod
+		marshalledPod, err := json.Marshal(pod)
+		if err != nil {
+			logger.Error(err, "Marshalling pod error.")
+			return admission.Errored(http.StatusInternalServerError, err)
+		}
+		logger.Info("Admission pod success.")
+		return admission.PatchResponseFromRaw(req.Object.Raw, marshalledPod)
+	}
+	/*
+	   // If we get here, we found a job
+	   err = a.InjectJob(job)
+
+	   	if err != nil {
+	   		logger.Error("Inject job error.", err)
+	   		return admission.Errored(http.StatusBadRequest, err)
+	   	}*/
+
+	marshalledJob, err := json.Marshal(job)
+
+	if err != nil {
+		logger.Error(err, "Marshalling job error.")
+		return admission.Errored(http.StatusInternalServerError, err)
+	}
+
+	logger.Info("Admission job success.")
+	return admission.PatchResponseFromRaw(req.Object.Raw, marshalledJob)
+}
+
+// Default is the expected entrypoint for a webhook
+func (a *fluenceWatcher) Default(ctx context.Context, obj runtime.Object) error {
+	pod, ok := obj.(*corev1.Pod)
+	if !ok {
+		job, ok := obj.(*batchv1.Job)
+		if !ok {
+			return fmt.Errorf("expected a Pod or Job but got a %T", obj)
+		}
+		logger.Info(fmt.Sprintf("Job %s is marked for fluence.", job.Name))
+		return nil
+		//		return a.InjectJob(job)
+	}
+	logger.Info(fmt.Sprintf("Pod %s is marked for fluence.", pod.Name))
+	return nil
+	//return a.InjectPod(pod)
+}
+
+// InjectPod adds the sidecar container to a pod
+func (a *fluenceWatcher) InjectPod(pod *corev1.Pod) error {
+
+	/*
+		// Cut out early if we have no labels
+		if pod.Annotations == nil {
+			logger.Info(fmt.Sprintf("Pod %s is not marked for oras storage.", pod.Name))
+			return nil
+		}
+
+		// Parse oras known labels into settings
+		settings := orasSettings.NewOrasCacheSettings(pod.Annotations)
+
+		// Cut out early if no oras identifiers!
+		if !settings.MarkedForOras {
+			logger.Warnf("Pod %s is not marked for oras storage.", pod.Name)
+			return nil
+		}
+
+		// Validate, return error if no good here.
+		if !settings.Validate() {
+			logger.Warnf("Pod %s oras storage did not validate.", pod.Name)
+			return fmt.Errorf("oras storage was requested but is not valid")
+		}
+
+		// The selector for the namespaced registry is the namespace
+		if pod.Labels == nil {
+			pod.Labels = map[string]string{}
+		}
+
+		// Even pods without say, the launcher, that are marked should have the network added
+		pod.Labels[defaults.OrasSelectorKey] = pod.ObjectMeta.Namespace
+		oras.AddSidecar(&pod.Spec, pod.ObjectMeta.Namespace, settings)
+		logger.Info(fmt.Sprintf("Pod %s is marked for oras storage.", pod.Name))*/
+	return nil
+}
+
+// InjectJob adds the sidecar container to the PodTemplateSpec of the Job
+func (a *fluenceWatcher) InjectJob(job *batchv1.Job) error {
+
+	/*
+		// Cut out early if we have no labels
+		if job.Annotations == nil {
+			logger.Info(fmt.Sprintf("Job %s is not marked for oras storage.", job.Name))
+			return nil
+		}
+
+		// Parse oras known labels into settings
+		settings := orasSettings.NewOrasCacheSettings(job.Annotations)
+
+		// Cut out early if no oras identifiers!
+		if !settings.MarkedForOras {
+			logger.Warnf("Job %s is not marked for oras storage.", job.Name)
+			return nil
+		}
+
+		// Validate, return error if no good here.
+		if !settings.Validate() {
+			logger.Warnf("Job %s oras storage did not validate.", job.Name)
+			return fmt.Errorf("oras storage was requested but is not valid")
+		}
+
+		// Add the sidecar to the podspec of the job
+		if job.Spec.Template.Labels == nil {
+			job.Spec.Template.Labels = map[string]string{}
+		}
+
+		// Add network to spec template so all pods are targeted
+		job.Spec.Template.Labels[defaults.OrasSelectorKey] = job.ObjectMeta.Namespace
+		oras.AddSidecar(&job.Spec.Template.Spec, job.ObjectMeta.Namespace, settings)
+		logger.Info(fmt.Sprintf("Job %s is marked for oras storage.", job.Name))*/
+	return nil
+}
diff --git a/sig-scheduler-plugins/cmd/controller/app/server.go b/sig-scheduler-plugins/cmd/controller/app/server.go
new file mode 100644
index 0000000..5927bec
--- /dev/null
+++ b/sig-scheduler-plugins/cmd/controller/app/server.go
@@ -0,0 +1,101 @@
+/*
+Copyright 2020 The Kubernetes Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package app
+
+import (
+	"k8s.io/apimachinery/pkg/runtime"
+	utilruntime "k8s.io/apimachinery/pkg/util/runtime"
+	clientgoscheme "k8s.io/client-go/kubernetes/scheme"
+	"k8s.io/klog/v2/klogr"
+
+	ctrl "sigs.k8s.io/controller-runtime"
+	"sigs.k8s.io/controller-runtime/pkg/healthz"
+
+	"sigs.k8s.io/controller-runtime/pkg/webhook"
+	api "sigs.k8s.io/scheduler-plugins/apis/scheduling/v1alpha1"
+	"sigs.k8s.io/scheduler-plugins/pkg/controllers"
+)
+
+var (
+	scheme   = runtime.NewScheme()
+	setupLog = ctrl.Log.WithName("setup")
+)
+
+func init() {
+	utilruntime.Must(clientgoscheme.AddToScheme(scheme))
+
+	utilruntime.Must(api.AddToScheme(scheme))
+}
+
+func Run(s *ServerRunOptions) error {
+	config := ctrl.GetConfigOrDie()
+	config.QPS = float32(s.ApiServerQPS)
+	config.Burst = s.ApiServerBurst
+
+	// Controller Runtime Controllers
+	ctrl.SetLogger(klogr.New())
+	mgr, err := ctrl.NewManager(ctrl.GetConfigOrDie(), ctrl.Options{
+		Scheme:                  scheme,
+		MetricsBindAddress:      s.MetricsAddr,
+		Port:                    9443,
+		HealthProbeBindAddress:  s.ProbeAddr,
+		LeaderElection:          s.EnableLeaderElection,
+		LeaderElectionID:        "sched-plugins-controllers",
+		LeaderElectionNamespace: "kube-system",
+	})
+	if err != nil {
+		setupLog.Error(err, "unable to start manager")
+		return err
+	}
+
+	if err = (&controllers.PodGroupReconciler{
+		Client:  mgr.GetClient(),
+		Scheme:  mgr.GetScheme(),
+		Workers: s.Workers,
+	}).SetupWithManager(mgr); err != nil {
+		setupLog.Error(err, "unable to create controller", "controller", "PodGroup")
+		return err
+	}
+
+	mgr.GetWebhookServer().Register("/mutate-v1-fluence", &webhook.Admission{
+		Handler: api.NewMutatingWebhook(mgr),
+	})
+
+	if err = (&controllers.ElasticQuotaReconciler{
+		Client:  mgr.GetClient(),
+		Scheme:  mgr.GetScheme(),
+		Workers: s.Workers,
+	}).SetupWithManager(mgr); err != nil {
+		setupLog.Error(err, "unable to create controller", "controller", "ElasticQuota")
+		return err
+	}
+
+	if err := mgr.AddHealthzCheck("healthz", healthz.Ping); err != nil {
+		setupLog.Error(err, "unable to set up health check")
+		return err
+	}
+	if err := mgr.AddReadyzCheck("readyz", healthz.Ping); err != nil {
+		setupLog.Error(err, "unable to set up ready check")
+		return err
+	}
+
+	if err := mgr.Start(ctrl.SetupSignalHandler()); err != nil {
+		setupLog.Error(err, "unable to start manager")
+		return err
+	}
+	return nil
+}
diff --git a/sig-scheduler-plugins/manifests/install/charts/as-a-second-scheduler/crds/scheduling.x-k8s.io_podgroups.yaml b/sig-scheduler-plugins/manifests/install/charts/as-a-second-scheduler/crds/scheduling.x-k8s.io_podgroups.yaml
new file mode 100644
index 0000000..d633b7d
--- /dev/null
+++ b/sig-scheduler-plugins/manifests/install/charts/as-a-second-scheduler/crds/scheduling.x-k8s.io_podgroups.yaml
@@ -0,0 +1,108 @@
+apiVersion: apiextensions.k8s.io/v1
+kind: CustomResourceDefinition
+metadata:
+  annotations:
+    api-approved.kubernetes.io: https://github.com/kubernetes-sigs/scheduler-plugins/pull/50
+    controller-gen.kubebuilder.io/version: v0.11.1
+    # TODO this needs if .Vaues.enableCertManager added back
+    cert-manager.io/inject-ca-from: '{{ .Release.Namespace }}/{{ include "scheduler-plugins-as-a-second-scheduler.fullname" . }}-serving-cert'
+  creationTimestamp: null
+  name: podgroups.scheduling.x-k8s.io
+spec:
+  conversion:
+    strategy: Webhook
+    webhook:
+      clientConfig:
+        service:
+          name: webhook-service
+          namespace: system
+          path: /convert
+      conversionReviewVersions:
+      - v1
+  group: scheduling.x-k8s.io
+  names:
+    kind: PodGroup
+    listKind: PodGroupList
+    plural: podgroups
+    shortNames:
+    - pg
+    - pgs
+    singular: podgroup
+  scope: Namespaced
+  versions:
+  - name: v1alpha1
+    schema:
+      openAPIV3Schema:
+        description: PodGroup is a collection of Pod; used for batch workload.
+        properties:
+          apiVersion:
+            description: 'APIVersion defines the versioned schema of this representation
+              of an object. Servers should convert recognized schemas to the latest
+              internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources'
+            type: string
+          kind:
+            description: 'Kind is a string value representing the REST resource this
+              object represents. Servers may infer this from the endpoint the client
+              submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds'
+            type: string
+          metadata:
+            type: object
+          spec:
+            description: Specification of the desired behavior of the pod group.
+            properties:
+              minMember:
+                description: MinMember defines the minimal number of members/tasks
+                  to run the pod group; if there's not enough resources to start all
+                  tasks, the scheduler will not start anyone.
+                format: int32
+                type: integer
+              minResources:
+                additionalProperties:
+                  anyOf:
+                  - type: integer
+                  - type: string
+                  pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+                  x-kubernetes-int-or-string: true
+                description: MinResources defines the minimal resource of members/tasks
+                  to run the pod group; if there's not enough resources to start all
+                  tasks, the scheduler will not start anyone.
+                type: object
+              scheduleTimeoutSeconds:
+                description: ScheduleTimeoutSeconds defines the maximal time of members/tasks
+                  to wait before run the pod group;
+                format: int32
+                type: integer
+            type: object
+          status:
+            description: Status represents the current information about a pod group.
+              This data may not be up to date.
+            properties:
+              failed:
+                description: The number of pods which reached phase Failed.
+                format: int32
+                type: integer
+              occupiedBy:
+                description: OccupiedBy marks the workload (e.g., deployment, statefulset)
+                  UID that occupy the podgroup. It is empty if not initialized.
+                type: string
+              phase:
+                description: Current phase of PodGroup.
+                type: string
+              running:
+                description: The number of actively running pods.
+                format: int32
+                type: integer
+              scheduleStartTime:
+                description: ScheduleStartTime of the group
+                format: date-time
+                type: string
+              succeeded:
+                description: The number of pods which reached phase Succeeded.
+                format: int32
+                type: integer
+            type: object
+        type: object
+    served: true
+    storage: true
+    subresources:
+      status: {}
diff --git a/sig-scheduler-plugins/manifests/install/charts/as-a-second-scheduler/templates/deployment.yaml b/sig-scheduler-plugins/manifests/install/charts/as-a-second-scheduler/templates/deployment.yaml
index 83ecccc..289a0e5 100644
--- a/sig-scheduler-plugins/manifests/install/charts/as-a-second-scheduler/templates/deployment.yaml
+++ b/sig-scheduler-plugins/manifests/install/charts/as-a-second-scheduler/templates/deployment.yaml
@@ -20,6 +20,19 @@ spec:
       - name: scheduler-plugins-controller
         image: {{ .Values.controller.image }}
         imagePullPolicy: {{ .Values.controller.pullPolicy }}
+        ports:
+        - containerPort: 9443
+          name: webhook-server
+          protocol: TCP
+        volumeMounts:
+        - mountPath: /tmp/k8s-webhook-server/serving-certs
+          name: cert
+          readOnly: true
+      volumes:
+      - name: cert
+        secret:
+          defaultMode: 420
+          secretName: webhook-server-cert
 ---
 apiVersion: apps/v1
 kind: Deployment
diff --git a/sig-scheduler-plugins/manifests/install/charts/as-a-second-scheduler/templates/mutating-webhook-configuration.yaml b/sig-scheduler-plugins/manifests/install/charts/as-a-second-scheduler/templates/mutating-webhook-configuration.yaml
new file mode 100644
index 0000000..d6e7330
--- /dev/null
+++ b/sig-scheduler-plugins/manifests/install/charts/as-a-second-scheduler/templates/mutating-webhook-configuration.yaml
@@ -0,0 +1,40 @@
+apiVersion: admissionregistration.k8s.io/v1
+kind: MutatingWebhookConfiguration
+metadata:
+  name: {{ include "scheduler-plugins-as-a-second-scheduler.name" . }}-mutating-webhook-configuration
+  {{- if .Values.enableCertManager }}
+  annotations:
+    cert-manager.io/inject-ca-from: {{ .Release.Namespace }}/{{ include "scheduler-plugins-as-a-second-scheduler.fullname" . }}-serving-cert
+  {{- end}}
+  labels:
+  {{- include "scheduler-plugins-as-a-second-scheduler.labels" . | nindent 4 }}
+webhooks:
+- admissionReviewVersions:
+  - v1
+  clientConfig:
+    service:
+      name: '{{ include "scheduler-plugins-as-a-second-scheduler.fullname" . }}-webhook-service'
+      namespace: '{{ .Release.Namespace }}'
+      path: /mutate-v1-fluence
+      {{- with (index .Values.webhookService.ports 0) }}
+      port: {{ .port }}
+      {{- end }}
+
+  failurePolicy: Fail
+  name: morascache.kb.io
+  rules:
+  - apiGroups:
+    - ""
+    - core
+    - batch
+    - scheduling.x-k8s.io
+    apiVersions:
+    - v1
+    - v1alpha1
+    operations:
+    - CREATE
+    resources:
+    - pods
+    - jobs
+    - podgroups
+  sideEffects: None
diff --git a/sig-scheduler-plugins/manifests/install/charts/as-a-second-scheduler/templates/selfsigned-issuer.yaml b/sig-scheduler-plugins/manifests/install/charts/as-a-second-scheduler/templates/selfsigned-issuer.yaml
new file mode 100644
index 0000000..aa4d0a1
--- /dev/null
+++ b/sig-scheduler-plugins/manifests/install/charts/as-a-second-scheduler/templates/selfsigned-issuer.yaml
@@ -0,0 +1,10 @@
+{{- if .Values.enableCertManager }}
+apiVersion: cert-manager.io/v1
+kind: Issuer
+metadata:
+  name: {{ include "scheduler-plugins-as-a-second-scheduler.fullname" . }}-selfsigned-issuer
+  labels:
+  {{- include "scheduler-plugins-as-a-second-scheduler.labels" . | nindent 4 }}
+spec:
+  selfSigned: {}
+{{- end}}
diff --git a/sig-scheduler-plugins/manifests/install/charts/as-a-second-scheduler/templates/serving-cert.yaml b/sig-scheduler-plugins/manifests/install/charts/as-a-second-scheduler/templates/serving-cert.yaml
new file mode 100644
index 0000000..0edefe2
--- /dev/null
+++ b/sig-scheduler-plugins/manifests/install/charts/as-a-second-scheduler/templates/serving-cert.yaml
@@ -0,0 +1,17 @@
+{{- if .Values.enableCertManager }}
+apiVersion: cert-manager.io/v1
+kind: Certificate
+metadata:
+  name: {{ include "scheduler-plugins-as-a-second-scheduler.fullname" . }}-serving-cert
+  labels:
+  {{- include "scheduler-plugins-as-a-second-scheduler.labels" . | nindent 4 }}
+spec:
+  dnsNames:
+  - '{{ include "scheduler-plugins-as-a-second-scheduler.fullname" . }}-webhook-service.{{ .Release.Namespace }}.svc'
+  - '{{ include "scheduler-plugins-as-a-second-scheduler.fullname" . }}-webhook-service.{{ .Release.Namespace }}.svc.{{
+    .Values.kubernetesClusterDomain }}'
+  issuerRef:
+    kind: Issuer
+    name: '{{ include "scheduler-plugins-as-a-second-scheduler.fullname" . }}-selfsigned-issuer'
+  secretName: webhook-server-cert
+{{- end}}
diff --git a/sig-scheduler-plugins/manifests/install/charts/as-a-second-scheduler/templates/webhook-service.yaml b/sig-scheduler-plugins/manifests/install/charts/as-a-second-scheduler/templates/webhook-service.yaml
new file mode 100644
index 0000000..bedfb95
--- /dev/null
+++ b/sig-scheduler-plugins/manifests/install/charts/as-a-second-scheduler/templates/webhook-service.yaml
@@ -0,0 +1,15 @@
+apiVersion: v1
+kind: Service
+metadata:
+  name: {{ include "scheduler-plugins-as-a-second-scheduler.fullname" . }}-webhook-service
+  labels:
+    app.kubernetes.io/component: webhook
+    app.kubernetes.io/created-by: scheduler-plugins-controller
+    app.kubernetes.io/part-of: scheduler-plugins-controller
+  {{- include "scheduler-plugins-as-a-second-scheduler.labels" . | nindent 4 }}
+spec:
+  type: {{ .Values.webhookService.type }}
+  selector:
+    app: scheduler-plugins-controller
+  ports:
+	{{- .Values.webhookService.ports | toYaml | nindent 2 -}}
diff --git a/sig-scheduler-plugins/manifests/install/charts/as-a-second-scheduler/values.yaml b/sig-scheduler-plugins/manifests/install/charts/as-a-second-scheduler/values.yaml
index be1e797..a5a7870 100644
--- a/sig-scheduler-plugins/manifests/install/charts/as-a-second-scheduler/values.yaml
+++ b/sig-scheduler-plugins/manifests/install/charts/as-a-second-scheduler/values.yaml
@@ -21,7 +21,6 @@ scheduler:
 
 controller:
   name: scheduler-plugins-controller
-  image: registry.k8s.io/scheduler-plugins/controller:v0.27.8
   image: ghcr.io/flux-framework/fluence-controller:latest
   replicaCount: 1
   pullPolicy: IfNotPresent
@@ -45,3 +44,12 @@ pluginConfig:
 #   args:
 #     scoringStrategy:
 #       type: MostAllocated # default is LeastAllocated
+
+enableCertManager: true
+kubernetesClusterDomain: cluster.local
+webhookService:
+  ports:
+  - port: 9443
+    protocol: TCP
+    targetPort: 9443
+  type: ClusterIP
\ No newline at end of file
diff --git a/src/fluence/utils/utils.go b/src/fluence/utils/utils.go
index f30eeda..f81f81c 100644
--- a/src/fluence/utils/utils.go
+++ b/src/fluence/utils/utils.go
@@ -16,6 +16,10 @@ import (
 	resourcehelper "k8s.io/kubectl/pkg/util/resource"
 )
 
+var (
+	controlPlaneLabel = "node-role.kubernetes.io/control-plane"
+)
+
 // CreateJGF creates the Json Graph Format
 func CreateJGF(filename string, skipLabel *string) error {
 	ctx := context.Background()
@@ -55,12 +59,18 @@ func CreateJGF(filename string, skipLabel *string) error {
 
 	for node_index, node := range nodes.Items {
 
-		// Question from V: what was this for (what is a worker)?
-		// _, worker := node.Labels["node-role.kubernetes.io/worker"]
+		// We should not be scheduling to the control plane
+		_, ok := node.Labels[controlPlaneLabel]
+		if ok {
+			fmt.Println("Skipping control plane node ", node.GetName())
+			continue
+		}
 
+		// Anything labeled with "skipLabel" meaning it is present,
+		// should be skipped
 		if *skipLabel != "" {
-			_, fluxnode := node.Labels[*skipLabel]
-			if !fluxnode {
+			_, ok := node.Labels[*skipLabel]
+			if ok {
 				fmt.Println("Skipping node ", node.GetName())
 				continue
 			}

From 10d624d4e25ee7c26365ecbf0f283de267d8b109 Mon Sep 17 00:00:00 2001
From: vsoch <vsoch@users.noreply.github.com>
Date: Sat, 17 Feb 2024 19:57:09 -0700
Subject: [PATCH 13/28] webhook: adding support for adding pod group labels

Problem: we need every pod object coming into the cluster to be
part of a group.
Solution: This change adds logic to the mutating webhook to
add the labels that indicate the group name and size. We
can eventually add flexibility here. I also realize that
we can easily watch for job objects first, and add the
group size/name to the pod template. This will be much
more efficient to then not have to add to the individual
pods that are part of a larger job. With this approach
I was able to create a fluence scheduled pod, and then
see my labels added! It does not do anything beyond that.
I am also adding a nice script that makes it easy to
build, load, and install fluence freshly, otherwise you
will use all your internet data for the month in like,
two days. Do not do that :P

Signed-off-by: vsoch <vsoch@users.noreply.github.com>
---
 README.md                                     |  81 +++------
 hack/quick-build.sh                           |  36 ++++
 .../scheduling/v1alpha1/podgroup_webhook.go   | 167 +++++++++---------
 .../charts/as-a-second-scheduler/values.yaml  |   2 +-
 .../pkg/controllers/podgroup_controller.go    |  29 ++-
 .../pkg/fluence/group/group.go                |  10 +-
 .../pkg/fluence/labels/labels.go              |   8 +
 7 files changed, 187 insertions(+), 146 deletions(-)
 create mode 100755 hack/quick-build.sh
 create mode 100644 sig-scheduler-plugins/pkg/fluence/labels/labels.go

diff --git a/README.md b/README.md
index 3821ad8..4f33cd9 100644
--- a/README.md
+++ b/README.md
@@ -21,18 +21,26 @@ For background on the Flux framework and the Fluxion scheduler, you can take a l
 Fluence is a custom scheduler plugin that you can specify to use with two directive in your pod spec -
 
 - Asking for `fluence` as the scheduler name
-- Defining a named group of pods with the `fluence.flux-framework.org/pod-group` label. 
-- Defining the group size with the `fluence.flux-framework.org/group-size` label. 
+- On either a job or a single or group of pods:
+  - Defining a named group of pods with the `fluence.flux-framework.org/pod-group` label. 
+  - Defining the group size with the `fluence.flux-framework.org/group-size` label. 
 
-If you are using Fluence, these values are required.
-An example is shown below for an indexed job, which will create multiple pods.
+The way it works:
+
+1. We have a mutating admission webhook that looks for jobs and pods, and ensures there are fluence labels.
+2. A PodGroup reconciler is watching for these same objects. When they are created (this is not done yet):
+  a. We find the labels and create the pod group object.
+  b. The pod group object has a timestamp for creation.
+3. When the pod is then given to fluence for scheduling, it already has the PodGroup created with name/size and can properly sort.
+
+Another strategy I'm considering (if the above runs into trouble) is to watch a [channel](https://book-v1.book.kubebuilder.io/beyond_basics/controller_watches). An example is shown below for an indexed job, which will create multiple pods.
 
 ```yaml
 apiVersion: batch/v1
 kind: Job
 metadata:
   name: fluence-job
-  annotations:
+  labels:
     fluence.flux-framework.org/pod-group: my-pods
     fluence.flux-framework.org/group-size: 10
 spec:
@@ -225,17 +233,6 @@ helm install \
     fluence as-a-second-scheduler/
 ```
 
-If you load your images into your testing environment and don't need to pull, you can change the pull policy too:
-
-```bash
-helm install \
-  --set scheduler.image=vanessa/fluence:latest \
-  --set scheduler.sidecarimage=vanessa/fluence-sidecar \
-  --set controller.image=vanessa/fluence-controller \
-  --set scheduler.sidecarPullPolicy=IfNotPresent \
-    fluence as-a-second-scheduler/
-```
-
 If you need to uninstall (e.g., to redo something):
 
 ```bash
@@ -433,31 +430,27 @@ make proto
 
 #### Workflow
 
-The easiest thing to do is to build the containers in some container namespace that you control (meaning you can push to a registry), e.g.,:
+You should first do these on your own:
 
-```bash
-make build REGISTRY=ghcr.io/vsoch
-```
+1. Create the kind cluster (`kubectl apply -f ./examples/kind-cluster.yaml`)
+2. Install the certificate manager.
 
-If needed, create a "multi node" kind cluster:
+I was having trouble developing this easily because it's a lot of steps to build and load containers and change directories and uninstall/install the charts, so I put together a small script that does the following:
 
-```bash
-kind create cluster --config ./examples/kind-config.yaml
-```
+1. Takes a registry of interest (probably doesn't matter since we are working locally, defaults to `ghcr.io/vsoch`
+2. builds all three images, the controller, sidecar, and fluence
+3. loads them all into kind
+4. changes directory to the charts
+5. uninstalls the fluence helm instance (if installed)
+6. installs it, targeted the images just built, and setting pullPolicy to never
 
-And then install with your custom images:
+The last step ensures we use the images we loaded! You can basically just do:
 
 ```bash
-cd ./upstream/manifests/install/charts
-helm install \
-  --set scheduler.image=ghcr.io/vsoch/fluence:latest \
-  --set controller.image=ghcr.io/vsoch/fluence-controller:latest \
-  --set scheduler.sidecarimage=ghcr.io/vsoch/fluence-sidecar:latest \
-        fluence as-a-second-scheduler/
+./hack/quick-build.sh
 ```
 
-And then apply what you need to test, and look at logs! 
-And then keep doing that until you get what you want :) Note that I haven't found a good way for the VSCode developer tools to work because we develop fluence outside of the tree it's supposed to be in.
+This sped up my development time immensely. If you want to manually do the steps, see that script for instructions.
 
 ##### kubectl plugin
 
@@ -472,26 +465,7 @@ helm install \
         fluence as-a-second-scheduler/
 ```
 
-For this setup if you are developing locally with kind, you will need to enable the ingress. Here is `kind-config.yaml`
-
-```yaml
-kind: Cluster
-apiVersion: kind.x-k8s.io/v1alpha4
-nodes:
-- role: control-plane
-  kubeadmConfigPatches:
-  - |
-    kind: InitConfiguration
-    nodeRegistration:
-      kubeletExtraArgs:
-        node-labels: "ingress-ready=true"
-  extraPortMappings:
-  - containerPort: 4242
-    hostPort: 4242
-    protocol: TCP
-```
-
-And to create:
+For this setup if you are developing locally with kind, you will need to enable the ingress, as is done in [examples/kind-config.yaml](examples/kind-config.yaml).
 
 ```bash
 kind create cluster --config ./kind-config.yaml
@@ -500,7 +474,6 @@ kind create cluster --config ./kind-config.yaml
 #### TODO
 
  - Try what [kueue does](https://github.com/kubernetes-sigs/kueue/blob/6d57813a52066dab412735deeeb60ebb0cdb8e8e/cmd/kueue/main.go#L146-L155) to not require cert-manager.
- - Possible bug with using kind (with custom config we are scheduling things to the control plane) - need to verify this didn't start happening with mutating webhook addition.
 
 #### Vanessa Thinking
 
diff --git a/hack/quick-build.sh b/hack/quick-build.sh
new file mode 100755
index 0000000..23a5c87
--- /dev/null
+++ b/hack/quick-build.sh
@@ -0,0 +1,36 @@
+#!/bin/bash
+
+# Before running this, you should:
+# 1. create the kind cluster (needs more than one node, fluence does not scheduler to the control plane)
+# 2. Install cert-manager
+# 3. Customize the script to point to your registry if you intend to push
+
+REGISTRY="${1:-ghcr.io/vsoch}"
+HERE=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
+ROOT=$(dirname ${HERE})
+
+# Go to the script directory
+cd ${ROOT}
+
+# These build each of the images. The sidecar is separate from the other two in src/
+make REGISTRY=${REGISTRY} SCHEDULER_IMAGE=fluence SIDECAR_IMAGE=fluence-sidecar CONTROLLER_IMAGE=fluence-controller
+
+# This is what it might look like to push
+# docker push ghcr.io/vsoch/fluence-sidecar && docker push ghcr.io/vsoch/fluence-controller && docker push ghcr.io/vsoch/fluence:latest
+
+# We load into kind so we don't need to push/pull and use up internet data ;)
+kind load docker-image ${REGISTRY}/fluence-sidecar:latest
+kind load docker-image ${REGISTRY}/fluence-controller:latest
+kind load docker-image ${REGISTRY}/fluence:latest
+
+# And then install using the charts. The pull policy ensures we use the loaded ones
+cd ${ROOT}/upstream/manifests/install/charts
+helm uninstall fluence || true
+helm install \
+  --set scheduler.image=${REGISTRY}/fluence:latest \
+  --set scheduler.sidecarPullPolicy=Never \
+  --set scheduler.pullPolicy=Never \
+  --set controller.pullPolicy=Never \
+  --set controller.image=${REGISTRY}/fluence-controller:latest \
+  --set scheduler.sidecarimage=${REGISTRY}/fluence-sidecar:latest \
+        fluence as-a-second-scheduler/
diff --git a/sig-scheduler-plugins/apis/scheduling/v1alpha1/podgroup_webhook.go b/sig-scheduler-plugins/apis/scheduling/v1alpha1/podgroup_webhook.go
index 55c4d45..13d327c 100644
--- a/sig-scheduler-plugins/apis/scheduling/v1alpha1/podgroup_webhook.go
+++ b/sig-scheduler-plugins/apis/scheduling/v1alpha1/podgroup_webhook.go
@@ -21,6 +21,7 @@ import (
 	ctrl "sigs.k8s.io/controller-runtime"
 	"sigs.k8s.io/controller-runtime/pkg/manager"
 	"sigs.k8s.io/controller-runtime/pkg/webhook/admission"
+	"sigs.k8s.io/scheduler-plugins/pkg/fluence/labels"
 )
 
 var (
@@ -41,20 +42,23 @@ func NewMutatingWebhook(mgr manager.Manager) *fluenceWatcher {
 }
 
 // mutate-v1-fluence
-
 type fluenceWatcher struct {
 	decoder *admission.Decoder
 }
 
+// Handle is the main handler for the webhook, which is looking for jobs and pods (in that order)
+// If a job comes in (with a pod template) first, we add the labels there first (and they will
+// not be added again).
 func (a *fluenceWatcher) Handle(ctx context.Context, req admission.Request) admission.Response {
 
 	logger.Info("Running webhook handle")
-	// First try for job
+
+	// Try for a job first, which would be created before pods
 	job := &batchv1.Job{}
 	err := a.decoder.Decode(req, job)
 	if err != nil {
 
-		// Try for a pod next
+		// Assume we operate on the level of pods for now
 		pod := &corev1.Pod{}
 		err := a.decoder.Decode(req, pod)
 		if err != nil {
@@ -63,32 +67,33 @@ func (a *fluenceWatcher) Handle(ctx context.Context, req admission.Request) admi
 		}
 
 		// If we get here, we decoded a pod
-		/*err = a.InjectPod(pod)
+		err = a.EnsureGroup(pod)
 		if err != nil {
-			logger.Error("Inject pod error.", err)
+			logger.Error(err, "Issue adding PodGroup to pod.")
 			return admission.Errored(http.StatusBadRequest, err)
-		}*/
+		}
+
+		logger.Info("Admission pod success.")
 
-		// Mutate the fields in pod
 		marshalledPod, err := json.Marshal(pod)
 		if err != nil {
 			logger.Error(err, "Marshalling pod error.")
 			return admission.Errored(http.StatusInternalServerError, err)
 		}
-		logger.Info("Admission pod success.")
+
+		logger.Info("Admission job success.")
 		return admission.PatchResponseFromRaw(req.Object.Raw, marshalledPod)
 	}
-	/*
-	   // If we get here, we found a job
-	   err = a.InjectJob(job)
 
-	   	if err != nil {
-	   		logger.Error("Inject job error.", err)
-	   		return admission.Errored(http.StatusBadRequest, err)
-	   	}*/
+	// If we get here, err was nil and we have a Job!
+	err = a.EnsureGroupOnJob(job)
+	if err != nil {
+		logger.Error(err, "Issue adding PodGroup to job.")
+		return admission.Errored(http.StatusBadRequest, err)
+	}
 
+	logger.Info("Admission job success.")
 	marshalledJob, err := json.Marshal(job)
-
 	if err != nil {
 		logger.Error(err, "Marshalling job error.")
 		return admission.Errored(http.StatusInternalServerError, err)
@@ -98,93 +103,89 @@ func (a *fluenceWatcher) Handle(ctx context.Context, req admission.Request) admi
 	return admission.PatchResponseFromRaw(req.Object.Raw, marshalledJob)
 }
 
-// Default is the expected entrypoint for a webhook
+// Default is the expected entrypoint for a webhook...
+// I don't remember if this is even called...
 func (a *fluenceWatcher) Default(ctx context.Context, obj runtime.Object) error {
 	pod, ok := obj.(*corev1.Pod)
 	if !ok {
-		job, ok := obj.(*batchv1.Job)
-		if !ok {
-			return fmt.Errorf("expected a Pod or Job but got a %T", obj)
-		}
-		logger.Info(fmt.Sprintf("Job %s is marked for fluence.", job.Name))
-		return nil
-		//		return a.InjectJob(job)
+		return fmt.Errorf("expected a Pod or Job but got a %T", obj)
 	}
 	logger.Info(fmt.Sprintf("Pod %s is marked for fluence.", pod.Name))
-	return nil
-	//return a.InjectPod(pod)
+	return a.EnsureGroup(pod)
 }
 
-// InjectPod adds the sidecar container to a pod
-func (a *fluenceWatcher) InjectPod(pod *corev1.Pod) error {
+// EnsureGroup adds pod group label and size if not present
+// This ensures that every pod passing through is part of a group.
+// Note that we need to do similar for Job.
+// A pod without a job wrapper, and without metadata is a group
+// of size 1.
+func (a *fluenceWatcher) EnsureGroup(pod *corev1.Pod) error {
 
-	/*
-		// Cut out early if we have no labels
-		if pod.Annotations == nil {
-			logger.Info(fmt.Sprintf("Pod %s is not marked for oras storage.", pod.Name))
-			return nil
-		}
-
-		// Parse oras known labels into settings
-		settings := orasSettings.NewOrasCacheSettings(pod.Annotations)
-
-		// Cut out early if no oras identifiers!
-		if !settings.MarkedForOras {
-			logger.Warnf("Pod %s is not marked for oras storage.", pod.Name)
-			return nil
-		}
+	// Add labels if we don't have anything. Everything is a group!
+	if pod.Labels == nil {
+		pod.Labels = map[string]string{}
+	}
 
-		// Validate, return error if no good here.
-		if !settings.Validate() {
-			logger.Warnf("Pod %s oras storage did not validate.", pod.Name)
-			return fmt.Errorf("oras storage was requested but is not valid")
-		}
+	// Do we have a group name?
+	groupName, ok := pod.Labels[labels.PodGroupNameLabel]
 
-		// The selector for the namespaced registry is the namespace
-		if pod.Labels == nil {
-			pod.Labels = map[string]string{}
-		}
+	// If we don't have a fluence group, create one under fluence namespace
+	if !ok {
+		groupName = fmt.Sprintf("fluence-group-%s-%s", pod.Namespace, pod.Name)
+		pod.Labels[labels.PodGroupNameLabel] = groupName
+	}
 
-		// Even pods without say, the launcher, that are marked should have the network added
-		pod.Labels[defaults.OrasSelectorKey] = pod.ObjectMeta.Namespace
-		oras.AddSidecar(&pod.Spec, pod.ObjectMeta.Namespace, settings)
-		logger.Info(fmt.Sprintf("Pod %s is marked for oras storage.", pod.Name))*/
+	// Do we have a group size? This will be parsed as a string, likely
+	groupSize, ok := pod.Labels[labels.PodGroupSizeLabel]
+	if !ok {
+		groupSize = "1"
+		pod.Labels[labels.PodGroupSizeLabel] = groupSize
+	}
 	return nil
 }
 
-// InjectJob adds the sidecar container to the PodTemplateSpec of the Job
-func (a *fluenceWatcher) InjectJob(job *batchv1.Job) error {
+// getJobLabel takes a label name and default and returns the value
+// We look on both the job and underlying pod spec template
+func getJobLabel(job *batchv1.Job, labelName, defaultLabel string) string {
 
-	/*
-		// Cut out early if we have no labels
-		if job.Annotations == nil {
-			logger.Info(fmt.Sprintf("Job %s is not marked for oras storage.", job.Name))
-			return nil
+	value, ok := job.Labels[labelName]
+	if !ok {
+		value, ok = job.Spec.Template.ObjectMeta.Labels[labelName]
+		if !ok {
+			value = defaultLabel
 		}
+	}
+	return value
+}
 
-		// Parse oras known labels into settings
-		settings := orasSettings.NewOrasCacheSettings(job.Annotations)
+// EnsureGroupOnJob looks for fluence labels (size and name) on both the job
+// and the pod template. We ultimately put on the pod, the lowest level unit.
+// Since we have the size of the job (paramllism) we can use that for the size
+func (a *fluenceWatcher) EnsureGroupOnJob(job *batchv1.Job) error {
 
-		// Cut out early if no oras identifiers!
-		if !settings.MarkedForOras {
-			logger.Warnf("Job %s is not marked for oras storage.", job.Name)
-			return nil
-		}
+	// Be forgiving - allow the person to specify it on the job directly or on the Podtemplate
+	// We will ultimately put the metadata on the Pod.
+	if job.Spec.Template.ObjectMeta.Labels == nil {
+		job.Spec.Template.ObjectMeta.Labels = map[string]string{}
+	}
+	if job.Labels == nil {
+		job.Labels = map[string]string{}
+	}
 
-		// Validate, return error if no good here.
-		if !settings.Validate() {
-			logger.Warnf("Job %s oras storage did not validate.", job.Name)
-			return fmt.Errorf("oras storage was requested but is not valid")
-		}
+	/// First get the name for the pod group (also setting on the pod template)
+	defaultName := fmt.Sprintf("fluence-group-%s-%s", job.Namespace, job.Name)
+	groupName := getJobLabel(job, labels.PodGroupNameLabel, defaultName)
 
-		// Add the sidecar to the podspec of the job
-		if job.Spec.Template.Labels == nil {
-			job.Spec.Template.Labels = map[string]string{}
-		}
+	// Wherever we find it, make sure the pod group name is on the pod spec template
+	job.Spec.Template.ObjectMeta.Labels[labels.PodGroupNameLabel] = groupName
 
-		// Add network to spec template so all pods are targeted
-		job.Spec.Template.Labels[defaults.OrasSelectorKey] = job.ObjectMeta.Namespace
-		oras.AddSidecar(&job.Spec.Template.Spec, job.ObjectMeta.Namespace, settings)
-		logger.Info(fmt.Sprintf("Job %s is marked for oras storage.", job.Name))*/
+	// Now do the same for the size, but the size is the size of the job
+	jobSize := *job.Spec.Parallelism
+	if jobSize == int32(0) {
+		jobSize = int32(1)
+	}
+	labelSize := fmt.Sprintf("%d", jobSize)
+	groupSize := getJobLabel(job, labels.PodGroupSizeLabel, labelSize)
+	job.Spec.Template.ObjectMeta.Labels[labels.PodGroupSizeLabel] = groupSize
 	return nil
 }
diff --git a/sig-scheduler-plugins/manifests/install/charts/as-a-second-scheduler/values.yaml b/sig-scheduler-plugins/manifests/install/charts/as-a-second-scheduler/values.yaml
index a5a7870..e48aa98 100644
--- a/sig-scheduler-plugins/manifests/install/charts/as-a-second-scheduler/values.yaml
+++ b/sig-scheduler-plugins/manifests/install/charts/as-a-second-scheduler/values.yaml
@@ -23,7 +23,7 @@ controller:
   name: scheduler-plugins-controller
   image: ghcr.io/flux-framework/fluence-controller:latest
   replicaCount: 1
-  pullPolicy: IfNotPresent
+  pullPolicy: Always
 
 # LoadVariationRiskBalancing and TargetLoadPacking are not enabled by default
 # as they need extra RBAC privileges on metrics.k8s.io.
diff --git a/sig-scheduler-plugins/pkg/controllers/podgroup_controller.go b/sig-scheduler-plugins/pkg/controllers/podgroup_controller.go
index 02eb4e4..fc8e8d4 100644
--- a/sig-scheduler-plugins/pkg/controllers/podgroup_controller.go
+++ b/sig-scheduler-plugins/pkg/controllers/podgroup_controller.go
@@ -199,11 +199,12 @@ func (r *PodGroupReconciler) SetupWithManager(mgr ctrl.Manager) error {
 // podToPodGroup is a watcher that looks for pods and associated pod group
 func (r *PodGroupReconciler) podToPodGroup(ctx context.Context, obj client.Object) []ctrl.Request {
 
+	r.log.Info("PANCAKES pre get pod in podToPodGroup flux-framework/fluence-controller")
 	pod, ok := obj.(*v1.Pod)
 	if !ok {
 		return nil
 	}
-	r.log.Info("podToPodGroup flux-framework/fluence-controller")
+	r.log.Info("PANCAKES post get pod in podToPodGroup flux-framework/fluence-controller")
 	r.log.V(5).Info("Running podToPodGroup", "pod", pod.Name, "namespace", pod.Namespace)
 	pgName := util.GetPodGroupLabel(pod)
 	if len(pgName) == 0 {
@@ -212,6 +213,32 @@ func (r *PodGroupReconciler) podToPodGroup(ctx context.Context, obj client.Objec
 
 	r.log.V(5).Info("Add pod group when pod gets added", "podGroup", pgName, "pod", pod.Name, "namespace", pod.Namespace)
 
+	// TODO we need an ability to trigger a create here. Likely we will just add
+	// the create function to watches. I'm wondering if we want to set the owner
+	// to the pod or the job that triggers?
+	// newPodGroup ensures we have a pod group
+	/*func newPodGroup(name, namespace string, size int32, pod *v1.Pod) {
+
+		// Create an owner reference to the pod
+		// https://github.com/kubernetes/apimachinery/blob/master/pkg/apis/meta/v1/types.go#L295
+		ownerRef := metav1.OwnerReferences{
+			Kind:       pod.ObjectMeta.Kind,
+			Name:       pod.Name,
+			APIVersion: pod.ObjectMeta.APIVersion,
+			UID:        pod.ObjectMeta.UID,
+		}
+		pg := PodGroup{
+			ObjectMeta: metav1.ObjectMeta{
+				Name:            name,
+				Namespace:       namespace,
+				OwnerReferences: []metav1.OwnerReferences{ownerRef},
+			},
+			Spec: PodGroupSpec{
+				MinMember: size,
+			},
+		}
+	}*/
+
 	return []ctrl.Request{{
 		NamespacedName: types.NamespacedName{
 			Namespace: pod.Namespace,
diff --git a/sig-scheduler-plugins/pkg/fluence/group/group.go b/sig-scheduler-plugins/pkg/fluence/group/group.go
index b681504..291ad17 100644
--- a/sig-scheduler-plugins/pkg/fluence/group/group.go
+++ b/sig-scheduler-plugins/pkg/fluence/group/group.go
@@ -10,11 +10,7 @@ import (
 	"k8s.io/kubernetes/pkg/scheduler/framework"
 
 	fcore "sigs.k8s.io/scheduler-plugins/pkg/fluence/core"
-)
-
-const (
-	PodGroupNameLabel = "fluence.pod-group"
-	PodGroupSizeLabel = "fluence.group-size"
+	"sigs.k8s.io/scheduler-plugins/pkg/fluence/labels"
 )
 
 // getDefaultGroupName returns a group name based on the pod namespace and name
@@ -71,13 +67,13 @@ func DeleteFluenceGroup(pod *v1.Pod) {
 
 // getFluenceGroupName looks for the group to indicate a fluence group, and returns it
 func getFluenceGroupName(pod *v1.Pod) string {
-	groupName, _ := pod.Labels[PodGroupNameLabel]
+	groupName, _ := pod.Labels[labels.PodGroupNameLabel]
 	return groupName
 }
 
 // getFluenceGroupSize gets the size of the fluence group
 func getFluenceGroupSize(pod *v1.Pod) int32 {
-	size, _ := pod.Labels[PodGroupSizeLabel]
+	size, _ := pod.Labels[labels.PodGroupSizeLabel]
 
 	// Default size of 1 if the label is not set (but name is)
 	if size == "" {
diff --git a/sig-scheduler-plugins/pkg/fluence/labels/labels.go b/sig-scheduler-plugins/pkg/fluence/labels/labels.go
new file mode 100644
index 0000000..e409ddc
--- /dev/null
+++ b/sig-scheduler-plugins/pkg/fluence/labels/labels.go
@@ -0,0 +1,8 @@
+package labels
+
+// Labels to be shared between different components
+
+const (
+	PodGroupNameLabel = "fluence.pod-group"
+	PodGroupSizeLabel = "fluence.group-size"
+)

From 000baac47dd77e36f9cbee455b7509bfa5dfcb02 Mon Sep 17 00:00:00 2001
From: vsoch <vsoch@users.noreply.github.com>
Date: Sun, 18 Feb 2024 11:40:52 -0700
Subject: [PATCH 14/28] pod-group: labels for name and size now lead to
 creation

Problem: we want the labels (size and name) that are explicitly set
to lead to the creation of the pod group so the user does not need to.
This is done by way of a watcher on pod, which will trigger after
the webhook that ensures that every pod (in a job or single pod)
has the proper label. Likely we want to do this for other abstractions
that hold pods as well, because it ensures that no matter how the pods
go into pending, we have the correct size and name. The only case that
a pod can come in without the label means that it was not scheduled
by fluence. The user is directed to not do this, but it is not
impossible (e.g., fluence sees itself show up here actually). So after
this addition we have the full steps to add the labels and create
the pod group, and next steps are (finally) to integrate this into
fluence (and remove the old abstraction to store it in memory).

Signed-off-by: vsoch <vsoch@users.noreply.github.com>
---
 Makefile                                      |   3 +-
 examples/pod-group/lammps/lammps2.yaml        |   3 -
 .../scheduling/v1alpha1/podgroup_webhook.go   |  38 ++-
 .../apis/scheduling/v1alpha1/types.go         | 188 ++++++++++++++
 .../cmd/controller/app/server.go              |   5 +
 .../mutating-webhook-configuration.yaml       |   3 +-
 .../pkg/controllers/podgroup_controller.go    | 229 +++++++++++++-----
 .../pkg/fluence/group/group.go                |   2 +-
 .../pkg/fluence/labels/labels.go              |   6 +-
 9 files changed, 399 insertions(+), 78 deletions(-)
 create mode 100644 sig-scheduler-plugins/apis/scheduling/v1alpha1/types.go

diff --git a/Makefile b/Makefile
index 8976cb4..6ab44fe 100644
--- a/Makefile
+++ b/Makefile
@@ -27,7 +27,6 @@ prepare: clone
 	# These are entirely new directory structures
 	rm -rf $(CLONE_UPSTREAM)/pkg/fluence
 	rm -rf $(CLONE_UPSTREAM)/pkg/controllers/podgroup_controller.go
-	rm -rf $(CLONE_UPSTREAM)/apis/scheduling/v1alpha1/podgroup_webhook.go
 	rm -rf $(CLONE_UPSTREAM)/cmd/controller/app/server.go
 	cp -R sig-scheduler-plugins/pkg/fluence $(CLONE_UPSTREAM)/pkg/fluence
 	cp -R sig-scheduler-plugins/pkg/controllers/* $(CLONE_UPSTREAM)/pkg/controllers/
@@ -38,7 +37,7 @@ prepare: clone
 	cp sig-scheduler-plugins/manifests/install/charts/as-a-second-scheduler/templates/*.yaml $(CLONE_UPSTREAM)/manifests/install/charts/as-a-second-scheduler/templates/
 	cp sig-scheduler-plugins/manifests/install/charts/as-a-second-scheduler/crds/*.yaml $(CLONE_UPSTREAM)/manifests/install/charts/as-a-second-scheduler/crds/
 	cp sig-scheduler-plugins/manifests/install/charts/as-a-second-scheduler/values.yaml $(CLONE_UPSTREAM)/manifests/install/charts/as-a-second-scheduler/values.yaml
-	cp sig-scheduler-plugins/apis/scheduling/v1alpha1/podgroup_webhook.go $(CLONE_UPSTREAM)/apis/scheduling/v1alpha1/podgroup_webhook.go
+	cp sig-scheduler-plugins/apis/scheduling/v1alpha1/*.go $(CLONE_UPSTREAM)/apis/scheduling/v1alpha1/
 	cp sig-scheduler-plugins/cmd/controller/app/server.go $(CLONE_UPSTREAM)/cmd/controller/app/server.go
 
 build: prepare
diff --git a/examples/pod-group/lammps/lammps2.yaml b/examples/pod-group/lammps/lammps2.yaml
index acdd2d5..5cc7535 100644
--- a/examples/pod-group/lammps/lammps2.yaml
+++ b/examples/pod-group/lammps/lammps2.yaml
@@ -8,9 +8,6 @@ spec:
     headlessName: l2
   pod:
     schedulerName: fluence
-    labels:
-      fluence.pod-group: lammps2
-      fluence.group-size: "2"
   containers:
     - image: ghcr.io/converged-computing/metric-lammps:latest@sha256:e24a1ba8954f5a0a7a0bd854cfc5ca7f82ca12607dc6ace38d838591b8deb8ed
       workingDir: /opt/lammps/examples/reaxff/HNS
diff --git a/sig-scheduler-plugins/apis/scheduling/v1alpha1/podgroup_webhook.go b/sig-scheduler-plugins/apis/scheduling/v1alpha1/podgroup_webhook.go
index 13d327c..bc99fe4 100644
--- a/sig-scheduler-plugins/apis/scheduling/v1alpha1/podgroup_webhook.go
+++ b/sig-scheduler-plugins/apis/scheduling/v1alpha1/podgroup_webhook.go
@@ -61,9 +61,12 @@ func (a *fluenceWatcher) Handle(ctx context.Context, req admission.Request) admi
 		// Assume we operate on the level of pods for now
 		pod := &corev1.Pod{}
 		err := a.decoder.Decode(req, pod)
+
+		// Assume it's a pod group or something else.
+		// We aren't in charge of validating people's pods.
+		// I don't think we should ever hit this case, actually
 		if err != nil {
-			logger.Error(err, "Admission error.")
-			return admission.Errored(http.StatusBadRequest, err)
+			return admission.Allowed("Found non-pod, non-job, this webhook does not validate beyond those.")
 		}
 
 		// If we get here, we decoded a pod
@@ -73,6 +76,8 @@ func (a *fluenceWatcher) Handle(ctx context.Context, req admission.Request) admi
 			return admission.Errored(http.StatusBadRequest, err)
 		}
 
+		// Send the updated pod to the events channel
+		//*a.events <- event.GenericEvent{Object: pod}
 		logger.Info("Admission pod success.")
 
 		marshalledPod, err := json.Marshal(pod)
@@ -92,7 +97,10 @@ func (a *fluenceWatcher) Handle(ctx context.Context, req admission.Request) admi
 		return admission.Errored(http.StatusBadRequest, err)
 	}
 
+	// Send the updated job to the events channel
+	//*a.events <- event.GenericEvent{Object: job}
 	logger.Info("Admission job success.")
+
 	marshalledJob, err := json.Marshal(job)
 	if err != nil {
 		logger.Error(err, "Marshalling job error.")
@@ -106,12 +114,20 @@ func (a *fluenceWatcher) Handle(ctx context.Context, req admission.Request) admi
 // Default is the expected entrypoint for a webhook...
 // I don't remember if this is even called...
 func (a *fluenceWatcher) Default(ctx context.Context, obj runtime.Object) error {
-	pod, ok := obj.(*corev1.Pod)
+	job, ok := obj.(*batchv1.Job)
 	if !ok {
-		return fmt.Errorf("expected a Pod or Job but got a %T", obj)
+		pod, ok := obj.(*corev1.Pod)
+
+		// This is adkin to an admission success - it's not a pod or job, so we don't care
+		// I don't think we should ever hit this case, actually
+		if !ok {
+			return nil
+		}
+		logger.Info(fmt.Sprintf("Pod %s is marked for fluence.", pod.Name))
+		return a.EnsureGroup(pod)
 	}
-	logger.Info(fmt.Sprintf("Pod %s is marked for fluence.", pod.Name))
-	return a.EnsureGroup(pod)
+	logger.Info(fmt.Sprintf("Job %s is marked for fluence.", job.Name))
+	return a.EnsureGroupOnJob(job)
 }
 
 // EnsureGroup adds pod group label and size if not present
@@ -127,12 +143,12 @@ func (a *fluenceWatcher) EnsureGroup(pod *corev1.Pod) error {
 	}
 
 	// Do we have a group name?
-	groupName, ok := pod.Labels[labels.PodGroupNameLabel]
+	groupName, ok := pod.Labels[labels.PodGroupLabel]
 
 	// If we don't have a fluence group, create one under fluence namespace
 	if !ok {
-		groupName = fmt.Sprintf("fluence-group-%s-%s", pod.Namespace, pod.Name)
-		pod.Labels[labels.PodGroupNameLabel] = groupName
+		groupName = fmt.Sprintf("fluence-group-%s", pod.Name)
+		pod.Labels[labels.PodGroupLabel] = groupName
 	}
 
 	// Do we have a group size? This will be parsed as a string, likely
@@ -174,10 +190,10 @@ func (a *fluenceWatcher) EnsureGroupOnJob(job *batchv1.Job) error {
 
 	/// First get the name for the pod group (also setting on the pod template)
 	defaultName := fmt.Sprintf("fluence-group-%s-%s", job.Namespace, job.Name)
-	groupName := getJobLabel(job, labels.PodGroupNameLabel, defaultName)
+	groupName := getJobLabel(job, labels.PodGroupLabel, defaultName)
 
 	// Wherever we find it, make sure the pod group name is on the pod spec template
-	job.Spec.Template.ObjectMeta.Labels[labels.PodGroupNameLabel] = groupName
+	job.Spec.Template.ObjectMeta.Labels[labels.PodGroupLabel] = groupName
 
 	// Now do the same for the size, but the size is the size of the job
 	jobSize := *job.Spec.Parallelism
diff --git a/sig-scheduler-plugins/apis/scheduling/v1alpha1/types.go b/sig-scheduler-plugins/apis/scheduling/v1alpha1/types.go
new file mode 100644
index 0000000..77f10f3
--- /dev/null
+++ b/sig-scheduler-plugins/apis/scheduling/v1alpha1/types.go
@@ -0,0 +1,188 @@
+/*
+Copyright 2020 The Kubernetes Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package v1alpha1
+
+import (
+	v1 "k8s.io/api/core/v1"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	"sigs.k8s.io/scheduler-plugins/apis/scheduling"
+)
+
+// ElasticQuota sets elastic quota restrictions per namespace
+// +genclient
+// +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object
+// +kubebuilder:object:root=true
+// +kubebuilder:resource:shortName={eq,eqs}
+// +kubebuilder:subresource:status
+// +kubebuilder:metadata:annotations="api-approved.kubernetes.io=https://github.com/kubernetes-sigs/scheduler-plugins/pull/52"
+type ElasticQuota struct {
+	metav1.TypeMeta `json:",inline"`
+
+	// Standard object's metadata.
+	// +optional
+	metav1.ObjectMeta `json:"metadata,omitempty" protobuf:"bytes,1,opt,name=metadata"`
+
+	// ElasticQuotaSpec defines the Min and Max for Quota.
+	// +optional
+	Spec ElasticQuotaSpec `json:"spec,omitempty" protobuf:"bytes,2,opt,name=spec"`
+
+	// ElasticQuotaStatus defines the observed use.
+	// +optional
+	Status ElasticQuotaStatus `json:"status,omitempty" protobuf:"bytes,3,opt,name=status"`
+}
+
+// ElasticQuotaSpec defines the Min and Max for Quota.
+type ElasticQuotaSpec struct {
+	// Min is the set of desired guaranteed limits for each named resource.
+	// +optional
+	Min v1.ResourceList `json:"min,omitempty" protobuf:"bytes,1,rep,name=min, casttype=ResourceList,castkey=ResourceName"`
+
+	// Max is the set of desired max limits for each named resource. The usage of max is based on the resource configurations of
+	// successfully scheduled pods.
+	// +optional
+	Max v1.ResourceList `json:"max,omitempty" protobuf:"bytes,2,rep,name=max, casttype=ResourceList,castkey=ResourceName"`
+}
+
+// ElasticQuotaStatus defines the observed use.
+type ElasticQuotaStatus struct {
+	// Used is the current observed total usage of the resource in the namespace.
+	// +optional
+	Used v1.ResourceList `json:"used,omitempty" protobuf:"bytes,1,rep,name=used,casttype=ResourceList,castkey=ResourceName"`
+}
+
+// +kubebuilder:object:root=true
+// +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object
+
+// ElasticQuotaList is a list of ElasticQuota items.
+type ElasticQuotaList struct {
+	metav1.TypeMeta `json:",inline"`
+
+	// Standard list metadata.
+	// +optional
+	metav1.ListMeta `json:"metadata,omitempty" protobuf:"bytes,1,opt,name=metadata"`
+
+	// Items is a list of ElasticQuota objects.
+	Items []ElasticQuota `json:"items" protobuf:"bytes,2,rep,name=items"`
+}
+
+// PodGroupPhase is the phase of a pod group at the current time.
+type PodGroupPhase string
+
+// These are the valid phase of podGroups.
+const (
+	// PodGroupPending means the pod group has been accepted by the system, but scheduler can not allocate
+	// enough resources to it.
+	PodGroupPending PodGroupPhase = "Pending"
+
+	// PodGroupRunning means the `spec.minMember` pods of the pod group are in running phase.
+	PodGroupRunning PodGroupPhase = "Running"
+
+	// PodGroupScheduling means the number of pods scheduled is bigger than `spec.minMember`
+	// but the number of running pods has not reached the `spec.minMember` pods of PodGroups.
+	PodGroupScheduling PodGroupPhase = "Scheduling"
+
+	// PodGroupUnknown means a part of `spec.minMember` pods of the pod group have been scheduled but the others can not
+	// be scheduled due to, e.g. not enough resource; scheduler will wait for related controllers to recover them.
+	PodGroupUnknown PodGroupPhase = "Unknown"
+
+	// PodGroupFinished means the `spec.minMember` pods of the pod group are successfully finished.
+	PodGroupFinished PodGroupPhase = "Finished"
+
+	// PodGroupFailed means at least one of `spec.minMember` pods have failed.
+	PodGroupFailed PodGroupPhase = "Failed"
+
+	// PodGroupLabel is the default label of coscheduling
+	PodGroupLabel = scheduling.GroupName + "/pod-group"
+)
+
+// PodGroup is a collection of Pod; used for batch workload.
+// +genclient
+// +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object
+// +kubebuilder:object:root=true
+// +kubebuilder:resource:shortName={pg,pgs}
+// +kubebuilder:subresource:status
+// +kubebuilder:metadata:annotations="api-approved.kubernetes.io=https://github.com/kubernetes-sigs/scheduler-plugins/pull/50"
+type PodGroup struct {
+	metav1.TypeMeta `json:",inline"`
+	// Standard object's metadata.
+	// +optional
+	metav1.ObjectMeta `json:"metadata,omitempty"`
+
+	// Specification of the desired behavior of the pod group.
+	// +optional
+	Spec PodGroupSpec `json:"spec,omitempty"`
+
+	// Status represents the current information about a pod group.
+	// This data may not be up to date.
+	// +optional
+	Status PodGroupStatus `json:"status,omitempty"`
+}
+
+// PodGroupSpec represents the template of a pod group.
+type PodGroupSpec struct {
+	// MinMember defines the minimal number of members/tasks to run the pod group;
+	// if there's not enough resources to start all tasks, the scheduler
+	// will not start anyone.
+	MinMember int32 `json:"minMember,omitempty"`
+
+	// MinResources defines the minimal resource of members/tasks to run the pod group;
+	// if there's not enough resources to start all tasks, the scheduler
+	// will not start anyone.
+	MinResources v1.ResourceList `json:"minResources,omitempty"`
+
+	// ScheduleTimeoutSeconds defines the maximal time of members/tasks to wait before run the pod group;
+	ScheduleTimeoutSeconds *int32 `json:"scheduleTimeoutSeconds,omitempty"`
+}
+
+// PodGroupStatus represents the current state of a pod group.
+type PodGroupStatus struct {
+	// Current phase of PodGroup.
+	Phase PodGroupPhase `json:"phase,omitempty"`
+
+	// OccupiedBy marks the workload (e.g., deployment, statefulset) UID that occupy the podgroup.
+	// It is empty if not initialized.
+	OccupiedBy string `json:"occupiedBy,omitempty"`
+
+	// The number of actively running pods.
+	// +optional
+	Running int32 `json:"running,omitempty"`
+
+	// The number of pods which reached phase Succeeded.
+	// +optional
+	Succeeded int32 `json:"succeeded,omitempty"`
+
+	// The number of pods which reached phase Failed.
+	// +optional
+	Failed int32 `json:"failed,omitempty"`
+
+	// ScheduleStartTime of the group (note that we changed this to a micro time)
+	// +optional
+	ScheduleStartTime metav1.MicroTime `json:"scheduleStartTime,omitempty"`
+}
+
+// +kubebuilder:object:root=true
+
+// PodGroupList is a collection of pod groups.
+type PodGroupList struct {
+	metav1.TypeMeta `json:",inline"`
+	// Standard list metadata
+	// +optional
+	metav1.ListMeta `json:"metadata,omitempty"`
+
+	// Items is the list of PodGroup
+	Items []PodGroup `json:"items"`
+}
diff --git a/sig-scheduler-plugins/cmd/controller/app/server.go b/sig-scheduler-plugins/cmd/controller/app/server.go
index 5927bec..d42c0f4 100644
--- a/sig-scheduler-plugins/cmd/controller/app/server.go
+++ b/sig-scheduler-plugins/cmd/controller/app/server.go
@@ -26,6 +26,7 @@ import (
 	"sigs.k8s.io/controller-runtime/pkg/healthz"
 
 	"sigs.k8s.io/controller-runtime/pkg/webhook"
+
 	api "sigs.k8s.io/scheduler-plugins/apis/scheduling/v1alpha1"
 	"sigs.k8s.io/scheduler-plugins/pkg/controllers"
 )
@@ -62,6 +63,10 @@ func Run(s *ServerRunOptions) error {
 		return err
 	}
 
+	// Create a channel for the mutating webhook to communicate back to the reconciler
+	// This way we create the PodGroup before scheduling
+	//c := make(chan event.GenericEvent)
+
 	if err = (&controllers.PodGroupReconciler{
 		Client:  mgr.GetClient(),
 		Scheme:  mgr.GetScheme(),
diff --git a/sig-scheduler-plugins/manifests/install/charts/as-a-second-scheduler/templates/mutating-webhook-configuration.yaml b/sig-scheduler-plugins/manifests/install/charts/as-a-second-scheduler/templates/mutating-webhook-configuration.yaml
index d6e7330..c639127 100644
--- a/sig-scheduler-plugins/manifests/install/charts/as-a-second-scheduler/templates/mutating-webhook-configuration.yaml
+++ b/sig-scheduler-plugins/manifests/install/charts/as-a-second-scheduler/templates/mutating-webhook-configuration.yaml
@@ -36,5 +36,6 @@ webhooks:
     resources:
     - pods
     - jobs
-    - podgroups
+# Can uncomment this if we want to mutate the pod groups after creation
+#    - podgroups
   sideEffects: None
diff --git a/sig-scheduler-plugins/pkg/controllers/podgroup_controller.go b/sig-scheduler-plugins/pkg/controllers/podgroup_controller.go
index fc8e8d4..72bda77 100644
--- a/sig-scheduler-plugins/pkg/controllers/podgroup_controller.go
+++ b/sig-scheduler-plugins/pkg/controllers/podgroup_controller.go
@@ -20,23 +20,25 @@ import (
 	"context"
 	"fmt"
 	"sort"
+	"strconv"
 	"strings"
 	"time"
 
 	"github.com/go-logr/logr"
 	v1 "k8s.io/api/core/v1"
 	apierrs "k8s.io/apimachinery/pkg/api/errors"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 	"k8s.io/apimachinery/pkg/labels"
 	"k8s.io/apimachinery/pkg/runtime"
 	"k8s.io/apimachinery/pkg/types"
 	"k8s.io/client-go/tools/record"
-
 	ctrl "sigs.k8s.io/controller-runtime"
 	"sigs.k8s.io/controller-runtime/pkg/client"
 	"sigs.k8s.io/controller-runtime/pkg/controller"
 	"sigs.k8s.io/controller-runtime/pkg/handler"
 	"sigs.k8s.io/controller-runtime/pkg/log"
 	schedv1alpha1 "sigs.k8s.io/scheduler-plugins/apis/scheduling/v1alpha1"
+	fluenceLabels "sigs.k8s.io/scheduler-plugins/pkg/fluence/labels"
 	"sigs.k8s.io/scheduler-plugins/pkg/util"
 )
 
@@ -65,96 +67,197 @@ type PodGroupReconciler struct {
 // - https://pkg.go.dev/sigs.k8s.io/controller-runtime@v0.11.0/pkg/reconcile
 func (r *PodGroupReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) {
 	log := log.FromContext(ctx)
-	log.Info("reconciling flux-framework/fluence-controller")
+	log.Info("reconciling flux-framework/fluence-controller for request")
 	pg := &schedv1alpha1.PodGroup{}
+
 	if err := r.Get(ctx, req.NamespacedName, pg); err != nil {
+
+		// Case 1: if we get here and it's not found, assume not created
 		if apierrs.IsNotFound(err) {
-			log.V(5).Info("Pod group has been deleted")
+			log.Info("PodGroup", "Status", fmt.Sprintf("Pod group %s is not found, deleted.", req.NamespacedName))
 			return ctrl.Result{}, nil
 		}
-		log.V(3).Error(err, "Unable to retrieve pod group")
+		log.Error(err, fmt.Sprintf("Unable to retrieve pod group %s", req.NamespacedName))
 		return ctrl.Result{}, err
 	}
 
-	if pg.Status.Phase == schedv1alpha1.PodGroupFinished ||
-		pg.Status.Phase == schedv1alpha1.PodGroupFailed {
+	// Grab all statuses (and groups of them) we are interested in
+	schedulingOrPending := (pg.Status.Phase == schedv1alpha1.PodGroupScheduling || pg.Status.Phase == schedv1alpha1.PodGroupPending)
+	twoDaysOld := pg.Status.ScheduleStartTime.Sub(pg.CreationTimestamp.Time) > 48*time.Hour
+	finishedOrFailed := pg.Status.Phase == schedv1alpha1.PodGroupFinished || pg.Status.Phase == schedv1alpha1.PodGroupFailed
+
+	// Finished or failed - clean up the group
+	if finishedOrFailed {
+		log.Info("PodGroup", "Status", fmt.Sprintf("Pod group %s is finished or failed.", req.NamespacedName))
 		return ctrl.Result{}, nil
 	}
+
 	// If startScheduleTime - createTime > 2days,
 	// do not reconcile again because pod may have been GCed
-	if (pg.Status.Phase == schedv1alpha1.PodGroupScheduling || pg.Status.Phase == schedv1alpha1.PodGroupPending) && pg.Status.Running == 0 &&
-		pg.Status.ScheduleStartTime.Sub(pg.CreationTimestamp.Time) > 48*time.Hour {
-		r.recorder.Event(pg, v1.EventTypeWarning,
-			"Timeout", "schedule time longer than 48 hours")
+	if schedulingOrPending && pg.Status.Running == 0 && twoDaysOld {
+		r.recorder.Event(pg, v1.EventTypeWarning, "Timeout", "schedule time longer than 48 hours")
 		return ctrl.Result{}, nil
 	}
 
+	// We can get the podList and check for sizes here
 	podList := &v1.PodList{}
-	if err := r.List(ctx, podList,
-		client.MatchingLabelsSelector{
-			Selector: labels.Set(map[string]string{
-				schedv1alpha1.PodGroupLabel: pg.Name}).AsSelector(),
-		}); err != nil {
+
+	// Select based on the group name
+	groupNameSelector := labels.Set(map[string]string{schedv1alpha1.PodGroupLabel: pg.Name}).AsSelector()
+	err := r.List(ctx, podList, client.MatchingLabelsSelector{Selector: groupNameSelector})
+	if err != nil {
 		log.Error(err, "List pods for group failed")
 		return ctrl.Result{}, err
 	}
-	pods := podList.Items
 
+	// Inspect the size, set on the group if not done yet
+	size := len(podList.Items)
+	log.Info("PodGroup", "Name", pg.Name, "Size", size)
+
+	// When first created, size should be unset (MinMember)
+	if int(pg.Spec.MinMember) == 0 {
+		log.Info("PodGroup", "Status", fmt.Sprintf("Pod group %s updating size to %d", pg.Name, size))
+		return r.updatePodGroupSize(ctx, pg, int32(size))
+
+	} else if int(pg.Spec.MinMember) != size {
+		// TODO: Not clear what to do here. Arguably, we also want to check the label size
+		// because (in the future) we can accept smaller sizes. But then we also need
+		// to account for if the labels are different, do we take the smallest?
+		log.Info("PodGroup", "Status", fmt.Sprintf("WARNING: Pod group current MinMember %s does not match %d", pg.Spec.MinMember, size))
+	}
+
+	// If we get here, we have a PodGroup with a set size and can inspect / update phase
+	pods := podList.Items
 	pgCopy := pg.DeepCopy()
+
 	switch pgCopy.Status.Phase {
 	case "":
 		pgCopy.Status.Phase = schedv1alpha1.PodGroupPending
 	case schedv1alpha1.PodGroupPending:
 		if len(pods) >= int(pg.Spec.MinMember) {
 			pgCopy.Status.Phase = schedv1alpha1.PodGroupScheduling
-			fillOccupiedObj(pgCopy, &pods[0])
+
+			// Always update owner references to be the first pod
+			// E.g., if a job owns it, ensure the group is deleted with it
+			updateOwnerReferences(pgCopy, &pods[0])
 		}
 	default:
+
+		// Get updated counts of running, succeeded, and failed pods
 		pgCopy.Status.Running, pgCopy.Status.Succeeded, pgCopy.Status.Failed = getCurrentPodStats(pods)
+
+		// If for some reason we weren't pending and now have fewer than min required, flip back to pending
 		if len(pods) < int(pg.Spec.MinMember) {
 			pgCopy.Status.Phase = schedv1alpha1.PodGroupPending
 			break
 		}
 
+		// A pod with succeeded + running STILL less than the minimum required is scheduling
 		if pgCopy.Status.Succeeded+pgCopy.Status.Running < pg.Spec.MinMember {
 			pgCopy.Status.Phase = schedv1alpha1.PodGroupScheduling
 		}
 
+		// A pod with succeeded + running >= the minimum required is running!
 		if pgCopy.Status.Succeeded+pgCopy.Status.Running >= pg.Spec.MinMember {
 			pgCopy.Status.Phase = schedv1alpha1.PodGroupRunning
 		}
-		// Final state of pod group
+
+		// We have non zero failed, and the total of failed, running amd succeeded > min member
+		// Final state of pod group is FAILED womp womp
 		if pgCopy.Status.Failed != 0 &&
 			pgCopy.Status.Failed+pgCopy.Status.Running+pgCopy.Status.Succeeded >= pg.Spec.MinMember {
 			pgCopy.Status.Phase = schedv1alpha1.PodGroupFailed
 		}
+
+		// Finished! This is where we want to get :)
+		// TODO: ideally the owning higher level object deletion will delete here,
+		// but that won't always work for one of pods - need a new strategy
 		if pgCopy.Status.Succeeded >= pg.Spec.MinMember {
 			pgCopy.Status.Phase = schedv1alpha1.PodGroupFinished
 		}
 	}
 
+	// TODO need better handling here of cleanup, etc. This mostly handles status changes
 	return r.patchPodGroup(ctx, pg, pgCopy)
 }
 
+// newPodGroup creates a new podGroup object, capturing the creation time
+// This should be followed by a request to reconsile it
+func (r *PodGroupReconciler) newPodGroup(
+	ctx context.Context,
+	name, namespace string,
+	groupSize int32,
+) (*schedv1alpha1.PodGroup, error) {
+
+	pg := &schedv1alpha1.PodGroup{
+		ObjectMeta: metav1.ObjectMeta{
+			Name:      name,
+			Namespace: namespace,
+		},
+		// Note that we don't know the size yet
+		// The most important thing here is the MicroTime!
+		Spec: schedv1alpha1.PodGroupSpec{
+			MinMember: groupSize,
+		},
+		Status: schedv1alpha1.PodGroupStatus{
+			ScheduleStartTime: metav1.NewMicroTime(time.Now()),
+		},
+	}
+	// TODO need to set a controller reference?
+	// ctrl.SetControllerReference(cluster, job, r.Scheme)
+	err := r.Create(ctx, pg)
+	if err != nil {
+		r.log.Error(err, "Failed to create new PodGroup", "Namespace:", pg.Namespace, "Name:", pg.Name)
+		return pg, err
+	}
+	// Successful - return and requeue
+	return pg, nil
+
+}
+
+// patchPodGroup is a halper function to run a patch and then return the correct result / error for the reconciler
 func (r *PodGroupReconciler) patchPodGroup(ctx context.Context, old, new *schedv1alpha1.PodGroup) (ctrl.Result, error) {
 	patch := client.MergeFrom(old)
 	if err := r.Status().Patch(ctx, new, patch); err != nil {
+		r.log.Error(err, "Issue patching PodGroup", "Namespace:", old.Namespace, "Name:", old.Name)
 		return ctrl.Result{}, err
 	}
 	err := r.Patch(ctx, new, patch)
+	if err != nil {
+		r.log.Error(err, "Issue patching PodGroup", "Namespace:", old.Namespace, "Name:", old.Name)
+	}
 	return ctrl.Result{}, err
 }
 
+// updatePodGroup does an update with reconcile instead of a patch request
+func (r *PodGroupReconciler) updatePodGroupSize(
+	ctx context.Context,
+	old *schedv1alpha1.PodGroup,
+	size int32,
+) (ctrl.Result, error) {
+
+	patch := client.MergeFrom(old.DeepCopy())
+	old.Spec.MinMember = size
+
+	// Apply the patch to update the size
+	r.Status().Update(ctx, old)
+	err := r.Patch(ctx, old, patch)
+	return ctrl.Result{Requeue: true}, err
+}
+
+// getCurrentPodStats gets the number of running, succeeded, and failed
+// We use these to populate the PodGroup
 func getCurrentPodStats(pods []v1.Pod) (int32, int32, int32) {
 	if len(pods) == 0 {
 		return 0, 0, 0
 	}
-
 	var (
 		running   int32 = 0
 		succeeded int32 = 0
 		failed    int32 = 0
 	)
+
+	// Loop and count things.
 	for _, pod := range pods {
 		switch pod.Status.Phase {
 		case v1.PodRunning:
@@ -168,7 +271,11 @@ func getCurrentPodStats(pods []v1.Pod) (int32, int32, int32) {
 	return running, succeeded, failed
 }
 
-func fillOccupiedObj(pg *schedv1alpha1.PodGroup, pod *v1.Pod) {
+// updateOwnerReferences ensures the group is always owned by the same entity that owns the pod
+// This ensures that, for example, a job that is wrapping pods is the owner.
+func updateOwnerReferences(pg *schedv1alpha1.PodGroup, pod *v1.Pod) {
+
+	// Case 1: The pod itself doesn't have owner references. YOLO
 	if len(pod.OwnerReferences) == 0 {
 		return
 	}
@@ -184,64 +291,68 @@ func fillOccupiedObj(pg *schedv1alpha1.PodGroup, pod *v1.Pod) {
 }
 
 // SetupWithManager sets up the controller with the Manager.
+// We watch the events channel, which is going to trigger from the mutating webhook
+// to send over when a pod group is created (hopefully preceeding schedule).
 func (r *PodGroupReconciler) SetupWithManager(mgr ctrl.Manager) error {
 	r.recorder = mgr.GetEventRecorderFor("PodGroupController")
 	r.log = mgr.GetLogger()
 	r.log.Info("setup with manager flux-framework/fluence-controller")
 
 	return ctrl.NewControllerManagedBy(mgr).
-		Watches(&v1.Pod{}, handler.EnqueueRequestsFromMapFunc(r.podToPodGroup)).
+		Watches(&v1.Pod{}, handler.EnqueueRequestsFromMapFunc(r.ensurePodGroup)).
 		For(&schedv1alpha1.PodGroup{}).
 		WithOptions(controller.Options{MaxConcurrentReconciles: r.Workers}).
 		Complete(r)
 }
 
-// podToPodGroup is a watcher that looks for pods and associated pod group
-func (r *PodGroupReconciler) podToPodGroup(ctx context.Context, obj client.Object) []ctrl.Request {
-
-	r.log.Info("PANCAKES pre get pod in podToPodGroup flux-framework/fluence-controller")
+func (r *PodGroupReconciler) ensurePodGroup(ctx context.Context, obj client.Object) []ctrl.Request {
 	pod, ok := obj.(*v1.Pod)
 	if !ok {
 		return nil
 	}
-	r.log.Info("PANCAKES post get pod in podToPodGroup flux-framework/fluence-controller")
-	r.log.V(5).Info("Running podToPodGroup", "pod", pod.Name, "namespace", pod.Namespace)
-	pgName := util.GetPodGroupLabel(pod)
-	if len(pgName) == 0 {
+	groupName := util.GetPodGroupLabel(pod)
+
+	// This case only happens when something is not scheduled by fluence
+	if len(groupName) == 0 {
+		r.log.Info("Pod: ", "Name", pod.Name, "Status", pod.Status.Phase, "Action", "Not fluence owned")
 		return nil
 	}
 
-	r.log.V(5).Info("Add pod group when pod gets added", "podGroup", pgName, "pod", pod.Name, "namespace", pod.Namespace)
+	// If we are watching the Pod and it's beyond pending, we hopefully already made a group
+	// and that group should be in the reconcile process.
+	if pod.Status.Phase != v1.PodPending {
+		r.log.Info("Pod: ", "Name", pod.Name, "Status", pod.Status.Phase, "Action", "Skipping reconcile")
+		return nil
+	}
 
-	// TODO we need an ability to trigger a create here. Likely we will just add
-	// the create function to watches. I'm wondering if we want to set the owner
-	// to the pod or the job that triggers?
-	// newPodGroup ensures we have a pod group
-	/*func newPodGroup(name, namespace string, size int32, pod *v1.Pod) {
+	// At this point we should have a group size (string) set by the webhook
+	rawSize := pod.Labels[fluenceLabels.PodGroupSizeLabel]
+	groupSize, err := strconv.ParseInt(rawSize, 10, 32)
+	if err != nil {
+		r.log.Error(err, "Parsing PodGroup size.")
+		return nil
+	}
 
-		// Create an owner reference to the pod
-		// https://github.com/kubernetes/apimachinery/blob/master/pkg/apis/meta/v1/types.go#L295
-		ownerRef := metav1.OwnerReferences{
-			Kind:       pod.ObjectMeta.Kind,
-			Name:       pod.Name,
-			APIVersion: pod.ObjectMeta.APIVersion,
-			UID:        pod.ObjectMeta.UID,
-		}
-		pg := PodGroup{
-			ObjectMeta: metav1.ObjectMeta{
-				Name:            name,
-				Namespace:       namespace,
-				OwnerReferences: []metav1.OwnerReferences{ownerRef},
-			},
-			Spec: PodGroupSpec{
-				MinMember: size,
-			},
-		}
-	}*/
+	namespacedName := types.NamespacedName{
+		Namespace: pod.Namespace,
+		Name:      groupName,
+	}
 
-	return []ctrl.Request{{
-		NamespacedName: types.NamespacedName{
-			Namespace: pod.Namespace,
-			Name:      pgName,
-		}}}
+	// Create the pod group if the pod is pending
+	pg := &schedv1alpha1.PodGroup{}
+	if err := r.Get(ctx, namespacedName, pg); err != nil {
+
+		// Case 1: if we get here and it's not found, assume not created
+		if apierrs.IsNotFound(err) {
+			r.log.Info("Pod: ", "Status", pod.Status.Phase, "Name", pod.Name, "Group", groupName, "Namespace", pod.Namespace, "Action", "Creating PodGroup")
+
+			// TODO should an owner be set here? Setting to a specific pod seems risky/wrong in case deleted.
+			err, _ := r.newPodGroup(ctx, groupName, pod.Namespace, int32(groupSize))
+			if err != nil {
+				return []ctrl.Request{{NamespacedName: namespacedName}}
+			}
+			r.log.Info("Pod: ", "Status", pod.Status.Phase, "Name", pod.Name, "Group", groupName, "Namespace", pod.Namespace, "Action", "Issue Creating PodGroup")
+		}
+	}
+	return nil
 }
diff --git a/sig-scheduler-plugins/pkg/fluence/group/group.go b/sig-scheduler-plugins/pkg/fluence/group/group.go
index 291ad17..4af84e2 100644
--- a/sig-scheduler-plugins/pkg/fluence/group/group.go
+++ b/sig-scheduler-plugins/pkg/fluence/group/group.go
@@ -67,7 +67,7 @@ func DeleteFluenceGroup(pod *v1.Pod) {
 
 // getFluenceGroupName looks for the group to indicate a fluence group, and returns it
 func getFluenceGroupName(pod *v1.Pod) string {
-	groupName, _ := pod.Labels[labels.PodGroupNameLabel]
+	groupName, _ := pod.Labels[labels.PodGroupLabel]
 	return groupName
 }
 
diff --git a/sig-scheduler-plugins/pkg/fluence/labels/labels.go b/sig-scheduler-plugins/pkg/fluence/labels/labels.go
index e409ddc..e0040ea 100644
--- a/sig-scheduler-plugins/pkg/fluence/labels/labels.go
+++ b/sig-scheduler-plugins/pkg/fluence/labels/labels.go
@@ -3,6 +3,10 @@ package labels
 // Labels to be shared between different components
 
 const (
-	PodGroupNameLabel = "fluence.pod-group"
+	// https://github.com/kubernetes-sigs/scheduler-plugins/blob/master/apis/scheduling/v1alpha1/types.go#L109
+	PodGroupLabel = "scheduling.x-k8s.io/pod-group"
+
+	// TODO add more labels here, to be discovered used later
+	//PodGroupNameLabel = "fluence.pod-group"
 	PodGroupSizeLabel = "fluence.group-size"
 )

From 7874d571601839ff8b7bc257b61e8211d22f60e2 Mon Sep 17 00:00:00 2001
From: vsoch <vsoch@users.noreply.github.com>
Date: Sun, 18 Feb 2024 16:09:01 -0700
Subject: [PATCH 15/28] fluence: refactor to use new PodGroup

Problem: fluence should only be storing state of jobid and
presence of a group name in a map to indicate node assignment.
Soluion: update the code here. Note that this is not working
yet, and I am pushing / opening the PR to not use the work
(and will update accordingly, and using this PR to test).

Signed-off-by: vsoch <vsoch@users.noreply.github.com>
---
 README.md                                     | 198 ++++++------
 docs/README.md                                |  25 ++
 .../simple_example/fluence-scheduler-pod.yaml |   4 +-
 .../pkg/fluence/core/core.go                  | 202 +++++-------
 sig-scheduler-plugins/pkg/fluence/events.go   | 150 +++++++++
 sig-scheduler-plugins/pkg/fluence/fluence.go  | 295 ++++++------------
 .../pkg/fluence/group/group.go                |  96 +-----
 .../pkg/fluence/utils/utils.go                |  14 +-
 src/fluence/fluxion/fluxion.go                |   2 +-
 9 files changed, 466 insertions(+), 520 deletions(-)
 create mode 100644 docs/README.md
 create mode 100644 sig-scheduler-plugins/pkg/fluence/events.go

diff --git a/README.md b/README.md
index 4f33cd9..8556dd1 100644
--- a/README.md
+++ b/README.md
@@ -9,11 +9,11 @@ Fluence enables HPC-grade pod scheduling in Kubernetes via the [Kubernetes Sched
 ## Getting started
 
 For instructions on how to start Fluence on a K8s cluster, see [examples](examples/). Documentation and instructions for reproducing our CANOPIE-2022 paper (citation below) can be found in the [canopie22-artifacts branch](https://github.com/flux-framework/flux-k8s/tree/canopie22-artifacts).
-For background on the Flux framework and the Fluxion scheduler, you can take a look at our award-winning R&D100 submission: https://ipo.llnl.gov/sites/default/files/2022-02/Flux_RD100_Final.pdf. For next steps:
+For background on the Flux framework and the Fluxion scheduler, you can take a look at our award-winning [R&D100 submission](https://ipo.llnl.gov/sites/default/files/2022-02/Flux_RD100_Final.pdf). For next steps:
 
  - To understand how it works, see [Design](#design)
  - To deploy our pre-built images, go to [Deploy](#deploy)
- - To build your own images, go to [Setup](#setup)
+ - To build your own images, go to [Build](#build)
  - To learn about repository organization, see [Developer](#developer)
 
 ### Design
@@ -21,19 +21,47 @@ For background on the Flux framework and the Fluxion scheduler, you can take a l
 Fluence is a custom scheduler plugin that you can specify to use with two directive in your pod spec -
 
 - Asking for `fluence` as the scheduler name
-- On either a job or a single or group of pods:
-  - Defining a named group of pods with the `fluence.flux-framework.org/pod-group` label. 
-  - Defining the group size with the `fluence.flux-framework.org/group-size` label. 
+
+Note that any abstraction with pods (or a single pod) marked for fluence will automatically have the group name
+and nodes derived. However, if you want to customize this metadata (for example, define the size of the pod group explicitly you can use
+the following labels):
+
+  - A named group of pods with the `scheduling.x-k8s.io/pod-group` label. 
+  - Defining the group size with the `fluence.group-size` label. 
+
+We expect to define more labels to customize the scheduling logic.
 
 The way it works:
 
-1. We have a mutating admission webhook that looks for jobs and pods, and ensures there are fluence labels.
-2. A PodGroup reconciler is watching for these same objects. When they are created (this is not done yet):
+1. We have a mutating admission webhook that looks for jobs and pods, and ensures there are fluence labels (likely we will add more abstractions).
+2. A PodGroup reconciler is watching for these same objects. When they are created:
   a. We find the labels and create the pod group object.
-  b. The pod group object has a timestamp for creation.
+  b. The pod group object has a timestamp for creation in milliseconds.
 3. When the pod is then given to fluence for scheduling, it already has the PodGroup created with name/size and can properly sort.
 
-Another strategy I'm considering (if the above runs into trouble) is to watch a [channel](https://book-v1.book.kubebuilder.io/beyond_basics/controller_watches). An example is shown below for an indexed job, which will create multiple pods.
+Here is an example of a Job intended for Fluence:
+
+```yaml
+apiVersion: batch/v1
+kind: Job
+metadata:
+  name: fluence-job
+spec:
+  completions: 10
+  parallelism: 10
+  completionMode: Indexed
+  template:
+    spec:
+      schedulerName: fluence
+      containers:
+      - name: fluence-job
+        image: busybox
+        command: [echo, potato]
+      restartPolicy: Never
+  backoffLimit: 4
+```
+
+And you can imagine if you want to group pods from different abstractions together, or declare a different size than what is represented in the Job:
 
 ```yaml
 apiVersion: batch/v1
@@ -41,8 +69,8 @@ kind: Job
 metadata:
   name: fluence-job
   labels:
-    fluence.flux-framework.org/pod-group: my-pods
-    fluence.flux-framework.org/group-size: 10
+    scheduling.x-k8s.io/pod-group: min-size-group
+    fluence.group-size: 5
 spec:
   completions: 10
   parallelism: 10
@@ -58,8 +86,7 @@ spec:
   backoffLimit: 4
 ```
 
-The group size might be different than, for example, your higher level abstraction (e.g., the IndexedJob) as there is no reason
-pods with different names cannot be part of the same group that needs to be scheduled together.
+There is no reason pods with different names or under different abstractions cannot be part of the same group that needs to be scheduled together.
 
 ### Deploy
 
@@ -88,7 +115,7 @@ helm install \
 And that's it! See the [testing install](#testing-install) section for a basic example
 to schedule pods using Fluence.
 
-### Setup
+### Build
 
 To build and test Fluence, you will need:
 
@@ -96,9 +123,7 @@ To build and test Fluence, you will need:
  - [helm](https://helm.sh/docs/intro/install/) to install charts for scheduler plugins.
  - A Kubernetes cluster for testing, e.g., you can deploy one with [kind](https://kind.sigs.k8s.io/docs/user/quick-start/)
 
-### Building Fluence
-
-There are two images we will be building:
+There are three images we will be building:
 
  - the scheduler sidecar: built from the repository here
  - the scheduler: built (and modified) from [this branch of scheduler-plugins](https://github.com/openshift-psap/scheduler-plugins/blob/fluence/build/scheduler/Dockerfile)
@@ -111,7 +136,7 @@ There are two images we will be building:
 This will run the full builds for all containers in one step, which includes:
 
 1. Building the fluence sidecar from source code in [src](src)
-2. Cloning the upstream kubernetes-sigs/plugin-schedulers respository to ./upstream
+2. Cloning the upstream kubernetes-sigs/plugin-schedulers repository to ./upstream
 3. Building the scheduler and controller containers
 
 From the root here:
@@ -128,26 +153,18 @@ make REGISTRY=vanessa SCHEDULER_IMAGE=fluence SIDECAR_IMAGE=fluence-sidecar CONT
 
 As an alternative, you can look at the Makefile to do each of the steps separately.
 
-
-Whatever build approach you use, you'll want to push to your registry for later discovery!
-
-```bash
-docker push docker.io/vanessa/fluence
-docker push docker.io/vanessa/fluence-sidecar
-docker push docker.io/vanessa/fluence-controller
-```
-
-### Prepare Cluster
+#### Prepare Cluster
 
 > Prepare a cluster and install the Kubernetes scheduling plugins framework
 
-These steps will require a Kubernetes cluster to install to, and having pushed the plugin container to a registry. If you aren't using a cloud provider, you can create a local one with `kind`:
+These steps will require a Kubernetes cluster to install to, and having pushed the plugin container to a registry OR loading
+them into the local cluster and setting the image pull policy to `Never`. If you aren't using a cloud provider, you can create a local one with `kind`:
 
 ```bash
 kind create cluster --config ./examples/kind-config.yaml
 ```
 
-And install the certificate manager:
+And again install the certificate manager:
 
 ```bash
 kubectl apply -f https://github.com/cert-manager/cert-manager/releases/download/v1.13.1/cert-manager.yaml
@@ -155,7 +172,7 @@ kubectl apply -f https://github.com/cert-manager/cert-manager/releases/download/
 
 **Important** if you are developing or testing fluence, note that custom scheduler plugins don't seem to work out of the box with MiniKube (but everything works with kind). Likely there are extensions or similar that need to be configured with MiniKube (that we have not looked into).
 
-### Install Fluence
+#### Install Fluence
 
 For some background, the [Scheduling Framework](https://kubernetes.io/docs/concepts/scheduling-eviction/scheduling-framework/) provided by
 Kubernetes means that our container is going to provide specific endpoints to allow for custom scheduling. At this point you can follow the instructions
@@ -184,19 +201,26 @@ helm show values as-a-second-scheduler/
 
 scheduler:
   name: fluence
-  image: registry.k8s.io/scheduler-plugins/kube-scheduler:v0.27.8
+  image: ghcr.io/flux-framework/fluence:latest
   replicaCount: 1
   leaderElect: false
   sidecarimage: ghcr.io/flux-framework/fluence-sidecar:latest
   policy: lonode
   pullPolicy: Always
   sidecarPullPolicy: Always
+  loggingLevel: "9"
+
+  # Port is for GRPC, and enabling the external service will also
+  # create the service and ingress to it, along with adding
+  # additional API endpoints for our TBA kubectl plugin
+  enableExternalService: false
+  port: 4242
 
 controller:
   name: scheduler-plugins-controller
   image: ghcr.io/flux-framework/fluence-controller:latest
   replicaCount: 1
-  pullPolicy: IfNotPresent
+  pullPolicy: Always
 
 # LoadVariationRiskBalancing and TargetLoadPacking are not enabled by default
 # as they need extra RBAC privileges on metrics.k8s.io.
@@ -217,6 +241,15 @@ pluginConfig:
 #   args:
 #     scoringStrategy:
 #       type: MostAllocated # default is LeastAllocated
+
+enableCertManager: true
+kubernetesClusterDomain: cluster.local
+webhookService:
+  ports:
+  - port: 9443
+    protocol: TCP
+    targetPort: 9443
+  type: ClusterIP
 ```
 
 </details>
@@ -239,9 +272,15 @@ If you need to uninstall (e.g., to redo something):
 helm uninstall fluence
 ```
 
+Or see the name you used:
+
+```bash
+helm list
+```
+
 Next you can move down to testing the install.
 
-### Testing Install
+#### Testing Install
 
 The installation process will run one scheduler and one controller pod for the Scheduler Plugin Framework in the default namespace.
 You can double check that everything is running as follows:
@@ -284,35 +323,40 @@ kubectl logs fluence-6bbcbc6bbf-xjfx6 -c scheduler-plugins-scheduler
 
 If you haven't done anything, you'll likely just see health checks.
 
-### Deploy Pods
+#### Testing Pods and Jobs
 
-Let's now run a simple example! Change directory into this directory:
+You can test deploying pods and jobs.
 
 ```bash
-# This is from the root of flux-k8s
-cd examples/simple_example
+kubectl apply -f examples/simple_example/fluence-scheduler-pod.yaml 
 ```
+or a job:
 
-And then we want to deploy two pods, one assigned to the `default-scheduler` and the other
-`fluence`. For FYI, we do this via setting `schedulerName` in the spec:
+```bash
+# size 3
+kubectl  apply -f examples/test_example/fluence-sized-job.yaml
+
+# size 1
+kubectl  apply -f examples/test_example/fluence-job.yaml 
+```
+
+Note that all of these have (in their spec) a designation of the fluence scheduler.
 
 ```yaml
 spec:
   schedulerName: fluence
 ```
 
-Here is how to create the pods:
+Once it was created, aside from checking that it ran OK, you can verify by looking at the scheduler logs again:
 
 ```bash
-kubectl apply -f default-scheduler-pod.yaml
-kubectl apply -f fluence-scheduler-pod.yaml
+kubectl logs fluence-6bbcbc6bbf-xjfx6
 ```
 
-Once it was created, aside from checking that it ran OK, I could verify by looking at the scheduler logs again:
+<details>
+
+<summary>Scheduler Logs</summary>
 
-```bash
-kubectl logs fluence-6bbcbc6bbf-xjfx6
-```
 ```bash
 Defaulted container "sidecar" out of: sidecar, scheduler-plugins-scheduler
 This is the fluxion grpc server
@@ -361,6 +405,8 @@ FINAL NODE RESULT:
 [GRPCServer] Response podID:"fluence-scheduled-pod" nodelist:{nodeID:"kind-control-plane" tasks:1} jobID:1 
 ```
 
+</details>
+
 I was trying to look for a way to see the assignment, and maybe we can see it here (this is the best I could come up with!)
 
 ```bash
@@ -385,7 +431,6 @@ pod/fluence-scheduled-pod  spec.containers{fluence-scheduled-container}  kubelet
 
 For the above, I found [this page](https://kubernetes.io/docs/tasks/extend-kubernetes/configure-multiple-schedulers/#enable-leader-election) very helpful.
 
-Finally, note that we also have a more appropriate example with jobs under [examples/test_example](examples/test_example). It's slightly more sane because it uses Job, and jobs are expected to complete (whereas pods are not and will get into crash loop backoffs, etc). For example of how to programmatically interact with the job pods and check states, events, see the [test.sh](.github/test.sh) script.
 
 ### Developer
 
@@ -397,9 +442,10 @@ If you are looking to develop:
 
  - [src](src): includes source code for fluence. You'll find logs for this code in the `sidecar` container of the fluence pod.
  - [sig-scheduler-plugins](sig-scheduler-plugins): includes assets (manifests and Go files) that are intended to be added to the kubernetes-sigs/scheduler-plugins upstream repository before build. You'll find logs for this container in the `scheduler-plugins-scheduler` container of the pod.
+   - [apis](sig-scheduler-plugins/apis): customized PodGroup to define the status scheduled time in micro seconds
    - [manifests](sig-scheduler-plugins/manifests): manifests for helm and Kubernetes
    - [pkg](sig-scheduler-plugins/pkg): the main fluence module to add to upstream
-   - [cmd](sig-scheduler-plugins/cmd): the main.go to replace in upstream
+   - [cmd](sig-scheduler-plugins/cmd): the main.go to replace in upstream   
  - *upstream*: the default name this upstream is cloned to when you do a make build command.
 
 Note that the clone of the repository and copying of files to the correct locations is all automated through the [Makefile](Makefile). Additional commands provided include the following:
@@ -447,7 +493,7 @@ I was having trouble developing this easily because it's a lot of steps to build
 The last step ensures we use the images we loaded! You can basically just do:
 
 ```bash
-./hack/quick-build.sh
+/bin/bash ./hack/quick-build.sh
 ```
 
 This sped up my development time immensely. If you want to manually do the steps, see that script for instructions.
@@ -474,60 +520,18 @@ kind create cluster --config ./kind-config.yaml
 #### TODO
 
  - Try what [kueue does](https://github.com/kubernetes-sigs/kueue/blob/6d57813a52066dab412735deeeb60ebb0cdb8e8e/cmd/kueue/main.go#L146-L155) to not require cert-manager.
-
-#### Vanessa Thinking
-
-> Updated February 15, 2024
-
-What I think might be happening (and not always, sometimes)
-
-- New pod group, no node list
-- Fluence assigns nodes
-- Nodes get assigned to pods 1:1
-- POD group is deleted
-- Some pod is sent back to queue (kubelet rejects, etc)
-- POD group does not exist and is recreated, no node list
-- Fluence asks again, but still has the first job. Not enough resources, asks forever.
-
-The above would not happen with the persistent pod group (if it wasn't cleaned up until the deletion of the job) and wouldn't happen if there are just enough resources to account for the overlap.
-
-- Does Fluence allocate resources for itself?
-- It would be nice to be able to inspect the state of Fluence.
-- At some point we want to be using the TBA fluxion-go instead of the one off branch we currently have (but we don't need to be blocked for that)
-- We should (I think) restore pod group (it's in the controller here) and have our own container built. That way we have total control over the custom resource, and we don't risk it going away.
-  - As a part of that, we can add add a mutating webhook that emulates what we are doing in fluence now to find the label, but instead we will create the CRD to hold state instead of trying to hold in the operator.
-- It could then also be investigated that we can more flexibly change the size of the group, within some min/max size (also determined by labels?) to help with scheduling.
-- Note that kueue has added a Pod Group object, so probably addresses the static case here.
+ - Add other abstraction types to be intercepted (and labeled with sizes)
 
 #### Components
 
  - [FluxStateData](sig-scheduler-plugins/pkg/fluence/core/core.go): is given to the [framework.CycleState](https://github.com/kubernetes/kubernetes/blob/242b41b36a20032f99e8a059ca0a5d764105217b/pkg/scheduler/framework/cycle_state.go#L48) and serves as a vehicle to store a cache of node name assignment.
 
 
-#### Helm
-
-The install commands are shown above, but often you want to uninstall!
-
-> What is the name of the installed plugin again?
-
-```bash
- helm list
-NAME                    NAMESPACE       REVISION        UPDATED                                 STATUS          CHART                   APP VERSION
-fluence  default         1               2024-01-08 12:04:58.558612156 -0700 MST deployed        scheduler-plugins-0.27.80.27.8     
-```
-
-And then uninstall:
-
-```bash
-$ helm uninstall fluence
-release "fluence" uninstalled
-```
-
-
 ## Papers
 
 You can find details of Fluence architecture, implementation, experiments, and improvements to the Kubeflow MPI operator in our collaboration's papers:
-```
+
+```bibtex
 @INPROCEEDINGS{10029991,
   author={Milroy, Daniel J. and Misale, Claudia and Georgakoudis, Giorgis and Elengikal, Tonia and Sarkar, Abhik and Drocco, Maurizio and Patki, Tapasya and Yeom, Jae-Seung and Gutierrez, Carlos Eduardo Arango and Ahn, Dong H. and Park, Yoonho},
   booktitle={2022 IEEE/ACM 4th International Workshop on Containers and New Orchestration Paradigms for Isolated Environments in HPC (CANOPIE-HPC)}, 
@@ -539,7 +543,7 @@ You can find details of Fluence architecture, implementation, experiments, and i
   doi={10.1109/CANOPIE-HPC56864.2022.00011}
 }
 ```
-```
+```bibtex
 @INPROCEEDINGS{9652595,
   author={Misale, Claudia and Drocco, Maurizio and Milroy, Daniel J. and Gutierrez, Carlos Eduardo Arango and Herbein, Stephen and Ahn, Dong H. and Park, Yoonho},
   booktitle={2021 3rd International Workshop on Containers and New Orchestration Paradigms for Isolated Environments in HPC (CANOPIE-HPC)}, 
@@ -551,7 +555,7 @@ You can find details of Fluence architecture, implementation, experiments, and i
   doi={10.1109/CANOPIEHPC54579.2021.00006}
 }
 ```
-```
+```bibtex
 @inproceedings{10.1007/978-3-030-96498-6_18,
 	address = {Cham},
 	author = {Misale, Claudia and Milroy, Daniel J. and Gutierrez, Carlos Eduardo Arango and Drocco, Maurizio and Herbein, Stephen and Ahn, Dong H. and Kaiser, Zvonko and Park, Yoonho},
diff --git a/docs/README.md b/docs/README.md
new file mode 100644
index 0000000..155ffc8
--- /dev/null
+++ b/docs/README.md
@@ -0,0 +1,25 @@
+# Development Notes
+
+## Thinking
+
+> Updated February 15, 2024
+
+What I think might be happening (and not always, sometimes)
+
+- New pod group, no node list
+- Fluence assigns nodes
+- Nodes get assigned to pods 1:1
+- POD group is deleted
+- Some pod is sent back to queue (kubelet rejects, etc)
+- POD group does not exist and is recreated, no node list
+- Fluence asks again, but still has the first job. Not enough resources, asks forever.
+
+The above would not happen with the persistent pod group (if it wasn't cleaned up until the deletion of the job) and wouldn't happen if there are just enough resources to account for the overlap.
+
+- Does Fluence allocate resources for itself?
+- It would be nice to be able to inspect the state of Fluence.
+- At some point we want to be using the TBA fluxion-go instead of the one off branch we currently have (but we don't need to be blocked for that)
+- We should (I think) restore pod group (it's in the controller here) and have our own container built. That way we have total control over the custom resource, and we don't risk it going away.
+  - As a part of that, we can add add a mutating webhook that emulates what we are doing in fluence now to find the label, but instead we will create the CRD to hold state instead of trying to hold in the operator.
+- It could then also be investigated that we can more flexibly change the size of the group, within some min/max size (also determined by labels?) to help with scheduling.
+- Note that kueue has added a Pod Group object, so probably addresses the static case here.
diff --git a/examples/simple_example/fluence-scheduler-pod.yaml b/examples/simple_example/fluence-scheduler-pod.yaml
index 71a8463..b09c714 100644
--- a/examples/simple_example/fluence-scheduler-pod.yaml
+++ b/examples/simple_example/fluence-scheduler-pod.yaml
@@ -1,11 +1,11 @@
 apiVersion: v1
 kind: Pod
 metadata:
-  name: fluence-scheduled-pod-1
+  name: fluence-scheduled-pod
   labels:
     name: scheduler-example
 spec:
   schedulerName: fluence
   containers:
   - name: fluence-scheduled-container
-    image: registry.k8s.io/pause:2.0
\ No newline at end of file
+    image: registry.k8s.io/pause:2.0
diff --git a/sig-scheduler-plugins/pkg/fluence/core/core.go b/sig-scheduler-plugins/pkg/fluence/core/core.go
index 53a627e..a3f4531 100644
--- a/sig-scheduler-plugins/pkg/fluence/core/core.go
+++ b/sig-scheduler-plugins/pkg/fluence/core/core.go
@@ -3,10 +3,7 @@ package core
 import (
 	"fmt"
 
-	v1 "k8s.io/api/core/v1"
-
-	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
-	"k8s.io/klog/v2"
+	klog "k8s.io/klog/v2"
 
 	"k8s.io/kubernetes/pkg/scheduler/framework"
 	pb "sigs.k8s.io/scheduler-plugins/pkg/fluence/fluxcli-grpc"
@@ -26,13 +23,9 @@ func (s *FluxStateData) Clone() framework.StateData {
 	return &FluxStateData{NodeCache: s.NodeCache}
 }
 
-// NewFluxState creates an entry for the CycleState with the minimum that we might need
-func NewFluxState(nodeName string, groupName string, size int32) *FluxStateData {
-	cache := NodeCache{
-		NodeName:     nodeName,
-		GroupName:    groupName,
-		MinGroupSize: size,
-	}
+// NewFluxState creates an entry for the CycleState with the node and group name
+func NewFluxState(nodeName string, groupName string) *FluxStateData {
+	cache := NodeCache{NodeName: nodeName}
 	return &FluxStateData{NodeCache: cache}
 }
 
@@ -42,162 +35,127 @@ func NewFluxState(nodeName string, groupName string, size int32) *FluxStateData
 type NodeCache struct {
 	NodeName string
 
-	// This is derived from tasks, where
-	// task is an allocation to some node
-	// High level it is most often akin to the
-	// number of pods on the node. I'm not sure that I understand this
-	// https://github.com/flux-framework/flux-k8s/blob/9f24f36752e3cced1b1112d93bfa366fb58b3c84/src/fluence/fluxion/fluxion.go#L94-L97
-	// How does that relate to a single pod? It is called "Count" in other places
-	Tasks int
+	// Tie assignment back to PodGroup, which can be used to get size and time created
+	GroupName string
 
-	// These fields are primarily for the FluxStateData
-	// Without a PodGroup CRD we keep min size here
-	MinGroupSize int32
-	GroupName    string
+	// Assigned tasks (often pods) to nodes
+	// https://github.com/flux-framework/flux-k8s/blob/9f24f36752e3cced1b1112d93bfa366fb58b3c84/src/fluence/fluxion/fluxion.go#L94-L97
+	AssignedTasks int
 }
 
 // A pod group cache holds a list of nodes for an allocation, where each has some number of tasks
 // along with the expected group size. This is intended to replace PodGroup
 // given the group name, size (derived from annotations) and timestamp
 type PodGroupCache struct {
+	GroupName string
 
 	// This is a cache of nodes for pods
 	Nodes []NodeCache
-	Size  int32
-	Name  string
-
-	// Keep track of when the group was initially created!
-	// This is like, the main thing we need.
-	TimeCreated metav1.MicroTime
 }
 
-// Memory cache of pod group name to pod group cache, above
-var podGroupCache map[string]*PodGroupCache
+// PodGroups seen by fluence
+var groupsSeen map[string]*PodGroupCache
 
-// Init populates the podGroupCache
+// Init populates the groupsSeen cache
 func Init() {
-	podGroupCache = map[string]*PodGroupCache{}
-}
-
-// RegisterPodGroup ensures that the PodGroup exists in the cache
-// This is an experimental replacement for an actual PodGroup
-// We take a timestampo, which if called from Less (during sorting) is tiem.Time
-// if called later (an individual pod) we go for its creation timestamp
-func RegisterPodGroup(pod *v1.Pod, groupName string, groupSize int32) error {
-	entry, ok := podGroupCache[groupName]
-
-	if !ok {
-
-		// Assume we create the group with the timestamp
-		// of the first pod seen. There might be imperfections
-		// by the second, but as long as we sort them via millisecond
-		// this should prevent interleaving
-		nodes := []NodeCache{}
-
-		// Create the new entry for the pod group
-		entry = &PodGroupCache{
-			Name:        groupName,
-			Size:        groupSize,
-			Nodes:       nodes,
-			TimeCreated: metav1.NowMicro(),
-		}
-
-		// Tell the user when it was created
-		klog.Infof("[Fluence] Pod group %s was created at %s\n", entry.Name, entry.TimeCreated)
-	}
-
-	// If the size has changed, we currently do not allow updating it.
-	// We issue a warning. In the future this could be supported with a grow command.
-	if entry.Size != groupSize {
-		klog.Infof("[Fluence] Pod group %s request to change size from %s to %s is not yet supported\n", groupName, entry.Size, groupSize)
-		// entry.GroupSize = groupSize
-	}
-	podGroupCache[groupName] = entry
-	return nil
+	groupsSeen = map[string]*PodGroupCache{}
 }
 
-// GetPodGroup gets a pod group in the cache by name
-func GetPodGroup(groupName string) *PodGroupCache {
-	entry, _ := podGroupCache[groupName]
+// GetFluenceCache determines if a group has been seen.
+// Yes -> we return the PodGroupCache entry
+// No -> the entry is nil / does not exist
+func GetFluenceCache(groupName string) *PodGroupCache {
+	entry, _ := groupsSeen[groupName]
 	return entry
 }
 
 // DeletePodGroup deletes a pod from the group cache
 func DeletePodGroup(groupName string) {
-	delete(podGroupCache, groupName)
-}
-
-// ListGroups lists groups, primarily for debugging
-func ListGroups() {
-	for name, pg := range podGroupCache {
-		fmt.Printf("                    %s: size %s, created at %s\n", name, pg.Size, &pg.TimeCreated)
-	}
+	delete(groupsSeen, groupName)
 }
 
 // CreateNodePodsList creates a list of node pod caches
-func CreateNodePodsList(nodelist []*pb.NodeAlloc, groupName string) (nodepods []NodeCache) {
+func CreateNodeList(nodelist []*pb.NodeAlloc, groupName string) (nodepods []NodeCache) {
 
 	// Create a pod cache for each node
 	nodepods = make([]NodeCache, len(nodelist))
 
+	// TODO: should we be integrating topology information here? Could it be the
+	// case that some nodes (pods) in the group should be closer?
 	for i, v := range nodelist {
 		nodepods[i] = NodeCache{
-			NodeName: v.GetNodeID(),
-			Tasks:    int(v.GetTasks()),
+			NodeName:      v.GetNodeID(),
+			AssignedTasks: int(v.GetTasks()),
+			GroupName:     groupName,
 		}
 	}
 
-	// Update the pods in the PodGraphCache
-	updatePodGroupNodes(groupName, nodepods)
-	klog.Infof("[Fluence] Pod group cache updated with nodes\n", podGroupCache)
+	// Update the pods in the PodGroupCache (groupsSeen)
+	updatePodGroupCache(groupName, nodepods)
 	return nodepods
 }
 
 // updatePodGroupList updates the PodGroupCache with a listing of nodes
-func updatePodGroupNodes(groupName string, nodes []NodeCache) {
-	group := podGroupCache[groupName]
-	group.Nodes = nodes
-	podGroupCache[groupName] = group
+func updatePodGroupCache(groupName string, nodes []NodeCache) {
+	cache := PodGroupCache{
+		Nodes:     nodes,
+		GroupName: groupName,
+	}
+	groupsSeen[groupName] = &cache
 }
 
-// HavePodNodes returns true if the listing of pods is not empty
-// This should be all pods that are needed - the allocation will not
-// be successful otherwise, so we just check > 0
-func (p *PodGroupCache) HavePodNodes() bool {
-	return len(p.Nodes) > 0
-}
+// GetNextNode gets the next node in the PodGroupCache
+func (p *PodGroupCache) GetNextNode() (string, error) {
 
-// CancelAllocation resets the node cache and allocation status
-func (p *PodGroupCache) CancelAllocation() {
-	p.Nodes = []NodeCache{}
-}
+	nextnode := ""
 
-// GetNextNode gets the next available node we can allocate for a group
-func GetNextNode(groupName string) (string, error) {
-	entry, ok := podGroupCache[groupName]
-	if !ok {
-		return "", fmt.Errorf("[Fluence] Map is empty\n")
-	}
-	if len(entry.Nodes) == 0 {
-		return "", fmt.Errorf("[Fluence] Error while getting a node\n")
+	// Quick failure state - we ran out of nodes
+	if len(p.Nodes) == 0 {
+		return nextnode, fmt.Errorf("[Fluence] PodGroup %s ran out of nodes.", p.GroupName)
 	}
 
-	nodename := entry.Nodes[0].NodeName
-	klog.Infof("[Fluence] Next node for group %s is %s", groupName, nodename)
+	// The next is the 0th in the list
+	nextnode = p.Nodes[0].NodeName
+	klog.Infof("[Fluence] Next node for group %s is %s", p.GroupName, nextnode)
 
-	if entry.Nodes[0].Tasks == 1 {
-		klog.Infof("[Fluence] First node has one task")
-		slice := entry.Nodes[1:]
+	// If there is only one task left, we are going to use it (and remove the node)
+	if p.Nodes[0].AssignedTasks == 1 {
+		klog.Infof("[Fluence] First node has one remaining task slot")
+		slice := p.Nodes[1:]
+
+		// If after we remove the node there are no nodes left...
+		// Note that I'm not deleting the node from the cache because that is the
+		// only way fluence knows it has already assigned work (presence of the key)
 		if len(slice) == 0 {
-			klog.Infof("[Fluence] After this node, the slice is empty, deleting group %s from cache\n", groupName)
-			delete(podGroupCache, groupName)
-			return nodename, nil
+			klog.Infof("[Fluence] Assigning node %s. There are NO reamining nodes for group %s\n", nextnode, p.GroupName)
+			// delete(podGroupCache, groupName)
+			return nextnode, nil
 		}
-		klog.Infof("[Fluence] After this node, the slide still has nodes")
-		updatePodGroupNodes(groupName, slice)
-		return nodename, nil
+
+		klog.Infof("[Fluence] Assigning node %s. There are nodes left for group", nextnode, p.GroupName)
+		updatePodGroupCache(p.GroupName, slice)
+		return nextnode, nil
+	}
+
+	// If we get here the first node had >1 assigned tasks
+	klog.Infof("[Fluence] Assigning node %s for group %s. There are still task assignments available for this node.", nextnode, p.GroupName)
+	p.Nodes[0].AssignedTasks = p.Nodes[0].AssignedTasks - 1
+	return nextnode, nil
+}
+
+// GetNextNode gets the next available node we can allocate for a group
+// TODO this should be able to take and pass forward a number of tasks.
+// It is implicity 1 now, but doesn't have to be.
+func GetNextNode(groupName string) (string, error) {
+
+	// Get our entry from the groupsSeen cache
+	klog.Infof("[Fluence] groups seen %s", groupsSeen)
+	entry, ok := groupsSeen[groupName]
+
+	// This case should not happen
+	if !ok {
+		return "", fmt.Errorf("[Fluence] Map is empty")
 	}
-	klog.Infof("[Fluence] Subtracting one task from first node")
-	entry.Nodes[0].Tasks = entry.Nodes[0].Tasks - 1
-	return nodename, nil
+	// Get the next node from the PodGroupCache
+	return entry.GetNextNode()
 }
diff --git a/sig-scheduler-plugins/pkg/fluence/events.go b/sig-scheduler-plugins/pkg/fluence/events.go
new file mode 100644
index 0000000..bc265f7
--- /dev/null
+++ b/sig-scheduler-plugins/pkg/fluence/events.go
@@ -0,0 +1,150 @@
+package fluence
+
+import (
+	"context"
+	"time"
+
+	"google.golang.org/grpc"
+	v1 "k8s.io/api/core/v1"
+	klog "k8s.io/klog/v2"
+
+	pb "sigs.k8s.io/scheduler-plugins/pkg/fluence/fluxcli-grpc"
+)
+
+// Events are associated with inforers, typically on pods, e.g.,
+// delete: deletion of a pod
+// update: update of a pod!
+// For both of the above, there are cases to cancel the flux job
+//  associated with the group id
+
+// cancelFluxJobForPod cancels the flux job for a pod.
+// We assume that the cancelled job also means deleting the pod group
+func (f *Fluence) cancelFluxJob(groupName string) error {
+
+	jobid, ok := f.groupToJobId[groupName]
+
+	// The job was already cancelled by another pod
+	if !ok {
+		klog.Infof("[Fluence] Request for cancel of group %s is already complete.", groupName)
+		return nil
+	}
+	klog.Infof("[Fluence] Cancel flux job: %v for group %s", jobid, groupName)
+
+	// This first error is about connecting to the server
+	conn, err := grpc.Dial("127.0.0.1:4242", grpc.WithInsecure())
+	if err != nil {
+		klog.Errorf("[Fluence] Error connecting to server: %v", err)
+		return err
+	}
+	defer conn.Close()
+
+	grpcclient := pb.NewFluxcliServiceClient(conn)
+	_, cancel := context.WithTimeout(context.Background(), 200*time.Second)
+	defer cancel()
+
+	// This error reflects the success or failure of the cancel request
+	request := &pb.CancelRequest{JobID: int64(jobid)}
+	res, err := grpcclient.Cancel(context.Background(), request)
+	if err != nil {
+		klog.Errorf("[Fluence] did not receive any cancel response: %v", err)
+		return err
+	}
+	klog.Infof("[Fluence] Job cancellation for group %s result: %d", groupName, res.Error)
+
+	// And this error is if the cancel was successful or not
+	if res.Error == 0 {
+		klog.Infof("[Fluence] Successful cancel of flux job: %d for group %s", jobid, groupName)
+		delete(f.groupToJobId, groupName)
+	} else {
+		klog.Warningf("[Fluence] Failed to cancel flux job %d for group %s", jobid, groupName)
+	}
+	return nil
+}
+
+// updatePod is called on an update, and the old and new object are presented
+func (f *Fluence) updatePod(oldObj, newObj interface{}) {
+
+	oldPod := oldObj.(*v1.Pod)
+	newPod := newObj.(*v1.Pod)
+
+	// a pod is updated, get the group
+	// TODO should we be checking group / size for old vs new?
+	groupName, _ := f.pgMgr.GetPodGroup(context.TODO(), oldPod)
+
+	klog.Infof("[Fluence] Processing event for pod %s in group %s from %s to %s", newPod.Name, groupName, newPod.Status.Phase, oldPod.Status.Phase)
+
+	switch newPod.Status.Phase {
+	case v1.PodPending:
+		// in this state we don't know if a pod is going to be running, thus we don't need to update job map
+	case v1.PodRunning:
+		// if a pod is start running, we can add it state to the delta graph if it is scheduled by other scheduler
+	case v1.PodSucceeded:
+		klog.Infof("[Fluence] Pod %s succeeded, Fluence needs to free the resources", newPod.Name)
+
+		f.mutex.Lock()
+		defer f.mutex.Unlock()
+
+		// Do we have the group id in our cache? If yes, we haven't deleted the jobid yet
+		// I am worried here that if some pods are succeeded and others pending, this could
+		// be a mistake - fluence would schedule it again
+		_, ok := f.groupToJobId[groupName]
+		if ok {
+			f.cancelFluxJob(groupName)
+		} else {
+			klog.Infof("[Fluence] Succeeded pod %s/%s in group %s doesn't have flux jobid", newPod.Namespace, newPod.Name, groupName)
+		}
+
+	case v1.PodFailed:
+
+		// a corner case need to be tested, the pod exit code is not 0, can be created with segmentation fault pi test
+		klog.Warningf("[Fluence] Pod %s in group %s failed, Fluence needs to free the resources", newPod.Name, groupName)
+
+		f.mutex.Lock()
+		defer f.mutex.Unlock()
+
+		_, ok := f.groupToJobId[groupName]
+		if ok {
+			f.cancelFluxJob(groupName)
+		} else {
+			klog.Errorf("[Fluence] Failed pod %s/%s in group %s doesn't have flux jobid", newPod.Namespace, newPod.Name, groupName)
+		}
+	case v1.PodUnknown:
+		// don't know how to deal with it as it's unknown phase
+	default:
+		// shouldn't enter this branch
+	}
+}
+
+// deletePod handles the delete event handler
+func (f *Fluence) deletePod(podObj interface{}) {
+	klog.Info("[Fluence] Delete Pod event handler")
+	pod := podObj.(*v1.Pod)
+	groupName, _ := f.pgMgr.GetPodGroup(context.TODO(), pod)
+
+	klog.Infof("[Fluence] Delete pod %s in group %s has status %s", pod.Status.Phase, pod.Name, groupName)
+	switch pod.Status.Phase {
+	case v1.PodSucceeded:
+	case v1.PodPending:
+		klog.Infof("[Fluence] Pod %s completed and is Pending termination, Fluence needs to free the resources", pod.Name)
+
+		f.mutex.Lock()
+		defer f.mutex.Unlock()
+
+		_, ok := f.groupToJobId[groupName]
+		if ok {
+			f.cancelFluxJob(groupName)
+		} else {
+			klog.Infof("[Fluence] Terminating pod %s/%s in group %s doesn't have flux jobid", pod.Namespace, pod.Name, groupName)
+		}
+	case v1.PodRunning:
+		f.mutex.Lock()
+		defer f.mutex.Unlock()
+
+		_, ok := f.groupToJobId[groupName]
+		if ok {
+			f.cancelFluxJob(groupName)
+		} else {
+			klog.Infof("[Fluence] Deleted pod %s/%s in group %s doesn't have flux jobid", pod.Namespace, pod.Name, groupName)
+		}
+	}
+}
diff --git a/sig-scheduler-plugins/pkg/fluence/fluence.go b/sig-scheduler-plugins/pkg/fluence/fluence.go
index 26282e5..0e8ec21 100644
--- a/sig-scheduler-plugins/pkg/fluence/fluence.go
+++ b/sig-scheduler-plugins/pkg/fluence/fluence.go
@@ -1,19 +1,3 @@
-/*
-Copyright 2022 The Kubernetes Authors.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-*/
-
 package fluence
 
 import (
@@ -32,7 +16,7 @@ import (
 	clientscheme "k8s.io/client-go/kubernetes/scheme"
 	"k8s.io/client-go/tools/cache"
 	corev1helpers "k8s.io/component-helpers/scheduling/corev1"
-	"k8s.io/klog/v2"
+	klog "k8s.io/klog/v2"
 	"k8s.io/kubernetes/pkg/scheduler/framework"
 
 	"sigs.k8s.io/controller-runtime/pkg/client"
@@ -49,12 +33,9 @@ type Fluence struct {
 	handle framework.Handle
 	client client.Client
 
-	// Important: I tested moving this into the group, but it's a bad idea because
-	// we need to delete the group after the last allocation is given, and then we
-	// no longer have the ID. It might be a better approach to delete it elsewhere
-	// (but I'm not sure where that elsewhere could be)
-	podNameToJobId map[string]uint64
-	pgMgr          coschedulingcore.Manager
+	// Store jobid on the level of a group (which can be a single pod)
+	groupToJobId map[string]uint64
+	pgMgr        coschedulingcore.Manager
 }
 
 // Name is the name of the plugin used in the Registry and configurations.
@@ -79,7 +60,7 @@ func (f *Fluence) Name() string {
 // https://github.com/kubernetes-sigs/scheduler-plugins/blob/master/pkg/coscheduling/coscheduling.go#L63
 func New(_ runtime.Object, handle framework.Handle) (framework.Plugin, error) {
 
-	f := &Fluence{handle: handle, podNameToJobId: make(map[string]uint64)}
+	f := &Fluence{handle: handle, groupToJobId: make(map[string]uint64)}
 
 	ctx := context.TODO()
 	fcore.Init()
@@ -106,7 +87,7 @@ func New(_ runtime.Object, handle framework.Handle) (framework.Plugin, error) {
 
 	fieldSelector, err := fields.ParseSelector(",status.phase!=" + string(v1.PodSucceeded) + ",status.phase!=" + string(v1.PodFailed))
 	if err != nil {
-		klog.ErrorS(err, "ParseSelector failed")
+		klog.Errorf("ParseSelector failed %s", err)
 		os.Exit(1)
 	}
 
@@ -116,6 +97,7 @@ func New(_ runtime.Object, handle framework.Handle) (framework.Plugin, error) {
 	podInformer := informerFactory.Core().V1().Pods()
 	scheduleTimeDuration := time.Duration(500) * time.Second
 
+	// https://github.com/kubernetes-sigs/scheduler-plugins/blob/master/pkg/coscheduling/core/core.go#L84
 	pgMgr := coschedulingcore.NewPodGroupManager(
 		k8scli,
 		handle.SnapshotSharedLister(),
@@ -141,34 +123,27 @@ func New(_ runtime.Object, handle framework.Handle) (framework.Plugin, error) {
 
 // Less is used to sort pods in the scheduling queue in the following order.
 // 1. Compare the priorities of Pods.
-// 2. Compare the initialization timestamps of fluence pod groups
-// 3. Fall back, sort by namespace/name
-// See 	https://kubernetes.io/docs/concepts/scheduling-eviction/scheduling-framework/
-// Less is part of Sort, which is the earliest we can see a pod unless we use gate
-// IMPORTANT: Less sometimes is not called for smaller sizes, not sure why.
-// To get around this we call it during PreFilter too.
+// 2. Compare the initialization timestamps of PodGroups or Pods.
+// 3. Compare the keys of PodGroups/Pods: <namespace>/<podname>.
 func (f *Fluence) Less(podInfo1, podInfo2 *framework.QueuedPodInfo) bool {
-	klog.Infof("[Fluence] Ordering pods in Less")
-
-	// ensure we have a PodGroup no matter what
-	klog.Infof("[Fluence] Comparing %s and %s", podInfo1.Pod.Name, podInfo2.Pod.Name)
-	podGroup1 := fgroup.EnsureFluenceGroup(podInfo1.Pod)
-	podGroup2 := fgroup.EnsureFluenceGroup(podInfo2.Pod)
-
-	// First preference to priority, but only if they are different
+	klog.Infof("ordering pods in fluence scheduler plugin")
 	prio1 := corev1helpers.PodPriority(podInfo1.Pod)
 	prio2 := corev1helpers.PodPriority(podInfo2.Pod)
-
-	// ...and only allow this to sort if they aren't the same
-	// The assumption here is that pods with priority are ignored by fluence
 	if prio1 != prio2 {
 		return prio1 > prio2
 	}
 
+	// Important: this GetPodGroup returns the first name as the Namespaced one,
+	// which is what fluence needs to distinguish between namespaces. Just the
+	// name could be replicated between different namespaces
+	ctx := context.TODO()
+	name1, podGroup1 := f.pgMgr.GetPodGroup(ctx, podInfo1.Pod)
+	name2, podGroup2 := f.pgMgr.GetPodGroup(ctx, podInfo2.Pod)
+
 	// Fluence can only compare if we have two known groups.
 	// This tries for that first, and falls back to the initial attempt timestamp
-	creationTime1 := fgroup.GetCreationTimestamp(podGroup1, podInfo1)
-	creationTime2 := fgroup.GetCreationTimestamp(podGroup2, podInfo2)
+	creationTime1 := fgroup.GetCreationTimestamp(name1, podGroup1, podInfo1)
+	creationTime2 := fgroup.GetCreationTimestamp(name2, podGroup2, podInfo2)
 
 	// If they are the same, fall back to sorting by name.
 	if creationTime1.Equal(&creationTime2) {
@@ -178,7 +153,7 @@ func (f *Fluence) Less(podInfo1, podInfo2 *framework.QueuedPodInfo) bool {
 }
 
 // PreFilter checks info about the Pod / checks conditions that the cluster or the Pod must meet.
-// This still comes after sort
+// This comes after sort
 func (f *Fluence) PreFilter(
 	ctx context.Context,
 	state *framework.CycleState,
@@ -189,31 +164,46 @@ func (f *Fluence) PreFilter(
 
 	// groupName will be named according to the single pod namespace / pod if there wasn't
 	// a user defined group. This is a size 1 group we handle equivalently.
-	pg := fgroup.GetPodsGroup(pod)
+	groupName, pg := f.pgMgr.GetPodGroup(ctx, pod)
+	klog.Infof("[Fluence] Pod %s is in group %s with minimum members %d", pod.Name, groupName, pg.Spec.MinMember)
 
-	klog.Infof("[Fluence] Pod %s group size %d", pod.Name, pg.Size)
-	klog.Infof("[Fluence] Pod %s group name is %s", pod.Name, pg.Name)
+	// Has this podgroup been seen by fluence yet? If yes, we will have it in the cache
+	cache := fcore.GetFluenceCache(groupName)
+	klog.Infof("[Fluence] cache %s", cache)
 
-	// Note that it is always the case we have a group
-	// We have not yet derived a node list
-	if !pg.HavePodNodes() {
-		klog.Infof("[Fluence] Does not have nodes yet, asking Fluxion")
-		err := f.AskFlux(ctx, pod, int(pg.Size))
+	// Fluence has never seen this before, we need to schedule an allocation
+	// It also could have been seen, but was not able to get one.
+	if cache == nil {
+		klog.Infof("[Fluence] Does not have nodes for %s yet, asking Fluxion", groupName)
+
+		// groupName is the namespaced name <namespace>/<name>
+		err := f.AskFlux(ctx, pod, pg, groupName)
 		if err != nil {
 			klog.Infof("[Fluence] Fluxion returned an error %s, not schedulable", err.Error())
 			return nil, framework.NewStatus(framework.Unschedulable, err.Error())
 		}
 	}
-	nodename, err := fcore.GetNextNode(pg.Name)
-	klog.Infof("Node Selected %s (%s:%s)", nodename, pod.Name, pg.Name)
+
+	// We can only get here if an allocation is done (and there is no error above)
+	// The cache would only originally be nil if we didn't do that yet. It should
+	// always be defined (not nil) when we get here
+	cache = fcore.GetFluenceCache(groupName)
+
+	// This is the next node in the list
+	nodename, err := fcore.GetNextNode(groupName)
 	if err != nil {
 		return nil, framework.NewStatus(framework.Unschedulable, err.Error())
 	}
-
-	// Create a fluxState (CycleState) with things that might be useful/
-	klog.Info("Node Selected: ", nodename)
-	cache := fcore.NodeCache{NodeName: nodename}
-	state.Write(framework.StateKey(pod.Name), &fcore.FluxStateData{NodeCache: cache})
+	klog.Infof("Node Selected %s (pod %s:group %s)", nodename, pod.Name, groupName)
+
+	// Create a fluxState (CycleState) with things that might be useful
+	// This isn't a PodGroupCache, but a single node cache, which also
+	// has group information, but just is for one node. Note that assigned
+	// tasks is hard coded to 1 but this isn't necessarily the case - we should
+	// eventually be able to GetNextNode for a number of tasks, for example
+	// (unless task == pod in which case it is always 1)
+	nodeCache := fcore.NodeCache{NodeName: nodename, GroupName: groupName, AssignedTasks: 1}
+	state.Write(framework.StateKey(pod.Name), &fcore.FluxStateData{NodeCache: nodeCache})
 	return nil, framework.NewStatus(framework.Success, "")
 }
 
@@ -226,8 +216,16 @@ func (f *Fluence) Filter(
 ) *framework.Status {
 
 	klog.Info("Filtering input node ", nodeInfo.Node().Name)
-	if v, e := cycleState.Read(framework.StateKey(pod.Name)); e == nil {
-		if value, ok := v.(*fcore.FluxStateData); ok && value.NodeCache.NodeName != nodeInfo.Node().Name {
+	state, err := cycleState.Read(framework.StateKey(pod.Name))
+
+	// No error means we retrieved the state
+	if err == nil {
+
+		// Try to convert the state to FluxStateDate
+		value, ok := state.(*fcore.FluxStateData)
+
+		// If we have state data that isn't equal to the current assignment, no go
+		if ok && value.NodeCache.NodeName != nodeInfo.Node().Name {
 			return framework.NewStatus(framework.Unschedulable, "pod is not permitted")
 		} else {
 			klog.Infof("Filter: node %s selected for %s\n", value.NodeCache.NodeName, pod.Name)
@@ -243,24 +241,33 @@ func (f *Fluence) PreFilterExtensions() framework.PreFilterExtensions {
 }
 
 // AskFlux will ask flux for an allocation for nodes for the pod group.
-func (f *Fluence) AskFlux(ctx context.Context, pod *v1.Pod, count int) error {
+func (f *Fluence) AskFlux(
+	ctx context.Context,
+	pod *v1.Pod,
+	pg *sched.PodGroup,
+	groupName string,
+) error {
+
 	// clean up previous match if a pod has already allocated previously
 	f.mutex.Lock()
-	_, isPodAllocated := f.podNameToJobId[pod.Name]
+	_, isAllocated := f.groupToJobId[groupName]
 	f.mutex.Unlock()
 
-	if isPodAllocated {
-		klog.Infof("[Fluence] Pod %s is allocated, cleaning up previous allocation\n", pod.Name)
-		f.mutex.Lock()
-		f.cancelFluxJobForPod(pod)
-		f.mutex.Unlock()
+	// Not allowing cancel for now - not sure how or why we could do this, need to better
+	// understand the case. This function should ONLY be successful on a new match allocate,
+	// otherwise the calling logic does not make sense.
+	if isAllocated {
+		return fmt.Errorf("[Fluence] Pod %s in group %s is allocated and calling AskFlux, should we be here?\n", pod.Name, groupName)
 	}
 
-	// Does the task name here matter? We are naming the entire group for the pod
-	jobspec := utils.InspectPodInfo(pod)
+	// IMPORTANT: this is a JobSpec for *one* pod, assuming they are all the same.
+	// This obviously may not be true if we have a hetereogenous PodGroup.
+	// We name it based on the group, since it will represent the group
+	jobspec := utils.PreparePodJobSpec(pod, groupName)
 	klog.Infof("[Fluence] Inspect pod info, jobspec: %s\n", jobspec)
 	conn, err := grpc.Dial("127.0.0.1:4242", grpc.WithInsecure())
 
+	// TODO change this to just return fmt.Errorf
 	if err != nil {
 		klog.Errorf("[Fluence] Error connecting to server: %v\n", err)
 		return err
@@ -274,154 +281,34 @@ func (f *Fluence) AskFlux(ctx context.Context, pod *v1.Pod, count int) error {
 	request := &pb.MatchRequest{
 		Ps:      jobspec,
 		Request: "allocate",
-		Count:   int32(count)}
+		Count:   pg.Spec.MinMember,
+	}
 
-	// Question from vsoch; Why return err instead of err2 here?
-	// err would return a nil value, but we need to return non nil,
-	// otherwise it's going to try to use the allocation (but there is none)
+	// An error here is an error with making the request
 	r, err := grpcclient.Match(context.Background(), request)
 	if err != nil {
 		klog.Errorf("[Fluence] did not receive any match response: %v\n", err)
 		return err
 	}
 
-	klog.Infof("[Fluence] response podID %s\n", r.GetPodID())
-
-	// Presence of a podGroup is indicated by a groupName
-	// Flag that the group is allocated (yes we also have the job id, testing for now)
-	pg := fgroup.GetPodsGroup(pod)
+	// TODO GetPodID should be renamed, because it will reflect the group
+	klog.Infof("[Fluence] Match response ID %s\n", r.GetPodID())
 
 	// Get the nodelist and inspect
 	nodes := r.GetNodelist()
 	klog.Infof("[Fluence] Nodelist returned from Fluxion: %s\n", nodes)
 
-	nodelist := fcore.CreateNodePodsList(nodes, pg.Name)
-	klog.Infof("[Fluence] parsed node pods list %s\n", nodelist)
+	// Assign the nodelist - this sets the group name in the groupSeen cache
+	// at this point, we can retrieve the cache and get nodes
+	nodelist := fcore.CreateNodeList(nodes, groupName)
+
 	jobid := uint64(r.GetJobID())
+	klog.Infof("[Fluence] parsed node pods list %s for job id %d\n", nodelist, jobid)
 
+	// TODO would be nice to actually be able to ask flux jobs -a to fluence
+	// That way we can verify assignments, etc.
 	f.mutex.Lock()
-	f.podNameToJobId[pod.Name] = jobid
-	klog.Infof("[Fluence] Check job assignment: %s\n", f.podNameToJobId)
+	f.groupToJobId[groupName] = jobid
 	f.mutex.Unlock()
 	return nil
 }
-
-// cancelFluxJobForPod cancels the flux job for a pod.
-// We assume that the cancelled job also means deleting the pod group
-func (f *Fluence) cancelFluxJobForPod(pod *v1.Pod) error {
-	jobid := f.podNameToJobId[pod.Name]
-
-	klog.Infof("[Fluence] Cancel flux job: %v for pod %s", jobid, pod.Name)
-
-	conn, err := grpc.Dial("127.0.0.1:4242", grpc.WithInsecure())
-
-	if err != nil {
-		klog.Errorf("[Fluence] Error connecting to server: %v", err)
-		return err
-	}
-	defer conn.Close()
-
-	grpcclient := pb.NewFluxcliServiceClient(conn)
-	_, cancel := context.WithTimeout(context.Background(), 200*time.Second)
-	defer cancel()
-
-	// I think this error reflects the success or failure of the cancel request
-	request := &pb.CancelRequest{JobID: int64(jobid)}
-	res, err := grpcclient.Cancel(context.Background(), request)
-	if err != nil {
-		klog.Errorf("[Fluence] did not receive any cancel response: %v", err)
-		return err
-	}
-	klog.Infof("[Fluence] Job cancellation for pod %s result: %d", pod.Name, res.Error)
-
-	// And this error is if the cancel was successful or not
-	if res.Error == 0 {
-		klog.Infof("[Fluence] Successful cancel of flux job: %v for pod %s", jobid, pod.Name)
-		delete(f.podNameToJobId, pod.Name)
-
-		// If we are successful, clear the group allocated nodes
-		fgroup.DeleteFluenceGroup(pod)
-	} else {
-		klog.Warningf("[Fluence] Failed to cancel flux job %v for pod %s", jobid, pod.Name)
-	}
-	return nil
-}
-
-// EventHandlers updatePod handles cleaning up resources
-func (f *Fluence) updatePod(oldObj, newObj interface{}) {
-
-	oldPod := oldObj.(*v1.Pod)
-	newPod := newObj.(*v1.Pod)
-
-	klog.Infof("[Fluence] Processing event for pod %s from %s to %s", newPod.Name, newPod.Status.Phase, oldPod.Status.Phase)
-
-	switch newPod.Status.Phase {
-	case v1.PodPending:
-		// in this state we don't know if a pod is going to be running, thus we don't need to update job map
-	case v1.PodRunning:
-		// if a pod is start running, we can add it state to the delta graph if it is scheduled by other scheduler
-	case v1.PodSucceeded:
-		klog.Infof("[Fluence] Pod %s succeeded, Fluence needs to free the resources", newPod.Name)
-
-		f.mutex.Lock()
-		defer f.mutex.Unlock()
-
-		if _, ok := f.podNameToJobId[newPod.Name]; ok {
-			f.cancelFluxJobForPod(newPod)
-		} else {
-			klog.Infof("[Fluence] Succeeded pod %s/%s doesn't have flux jobid", newPod.Namespace, newPod.Name)
-		}
-	case v1.PodFailed:
-		// a corner case need to be tested, the pod exit code is not 0, can be created with segmentation fault pi test
-		klog.Warningf("[Fluence] Pod %s failed, Fluence needs to free the resources", newPod.Name)
-
-		f.mutex.Lock()
-		defer f.mutex.Unlock()
-
-		if _, ok := f.podNameToJobId[newPod.Name]; ok {
-			f.cancelFluxJobForPod(newPod)
-		} else {
-			klog.Errorf("[Fluence] Failed pod %s/%s doesn't have flux jobid", newPod.Namespace, newPod.Name)
-		}
-	case v1.PodUnknown:
-		// don't know how to deal with it as it's unknown phase
-	default:
-		// shouldn't enter this branch
-	}
-}
-
-// deletePod handles the delete event handler
-// TODO when should we clear group from the cache?
-func (f *Fluence) deletePod(podObj interface{}) {
-	klog.Info("[Fluence] Delete Pod event handler")
-
-	pod := podObj.(*v1.Pod)
-	klog.Infof("[Fluence] Delete pod has status %s", pod.Status.Phase)
-	switch pod.Status.Phase {
-	case v1.PodSucceeded:
-	case v1.PodPending:
-		klog.Infof("[Fluence] Pod %s completed and is Pending termination, Fluence needs to free the resources", pod.Name)
-
-		f.mutex.Lock()
-		defer f.mutex.Unlock()
-
-		if _, ok := f.podNameToJobId[pod.Name]; ok {
-			f.cancelFluxJobForPod(pod)
-		} else {
-			klog.Infof("[Fluence] Terminating pod %s/%s doesn't have flux jobid", pod.Namespace, pod.Name)
-		}
-	case v1.PodRunning:
-		f.mutex.Lock()
-		defer f.mutex.Unlock()
-
-		if _, ok := f.podNameToJobId[pod.Name]; ok {
-			f.cancelFluxJobForPod(pod)
-		} else {
-			klog.Infof("[Fluence] Deleted pod %s/%s doesn't have flux jobid", pod.Namespace, pod.Name)
-		}
-	}
-
-	// We assume that a request to delete one pod means all of them.
-	// We have to take an all or nothing approach for now
-	fgroup.DeleteFluenceGroup(pod)
-}
diff --git a/sig-scheduler-plugins/pkg/fluence/group/group.go b/sig-scheduler-plugins/pkg/fluence/group/group.go
index 4af84e2..455b9e5 100644
--- a/sig-scheduler-plugins/pkg/fluence/group/group.go
+++ b/sig-scheduler-plugins/pkg/fluence/group/group.go
@@ -1,103 +1,23 @@
 package group
 
 import (
-	"fmt"
-	"strconv"
-
-	v1 "k8s.io/api/core/v1"
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
-	"k8s.io/klog/v2"
+	klog "k8s.io/klog/v2"
 	"k8s.io/kubernetes/pkg/scheduler/framework"
 
-	fcore "sigs.k8s.io/scheduler-plugins/pkg/fluence/core"
-	"sigs.k8s.io/scheduler-plugins/pkg/fluence/labels"
+	sched "sigs.k8s.io/scheduler-plugins/apis/scheduling/v1alpha1"
 )
 
-// getDefaultGroupName returns a group name based on the pod namespace and name
-// We could do this for pods that are not labeled, and treat them as a size 1 group
-func getDefaultGroupName(pod *v1.Pod) string {
-	return fmt.Sprintf("%s-%s", pod.Namespace, pod.Name)
-}
-
-// getPodsGroup gets the pods group, if it exists.
-func GetPodsGroup(pod *v1.Pod) *fcore.PodGroupCache {
-	groupName := EnsureFluenceGroup(pod)
-	return fcore.GetPodGroup(groupName)
-}
-
-// GetGroup is a courtesy wrapper around fcore.GetPodGroup
-func GetGroup(groupName string) *fcore.PodGroupCache {
-	return fcore.GetPodGroup(groupName)
-}
-
-// ensureFluenceGroup ensure that a podGroup is created for the named fluence group
-// Preference goes to the traditional PodGroup (created by the user)
-// and falls back to having one created by fluence. If there is no PodGroup
-// created and no fluence annotation, we do not create the group.
-// Likely for fluence we'd want a cleanup function somehow too,
-// for now assume groups are unique by name.
-func EnsureFluenceGroup(pod *v1.Pod) string {
-
-	// Get the group name and size from the fluence labels
-	groupName := getFluenceGroupName(pod)
-	groupSize := getFluenceGroupSize(pod)
-
-	// If there isn't a group, make a single node sized group
-	// This is so we can always treat the cases equally
-	if groupName == "" {
-		klog.Infof("[Fluence] Group annotation missing for pod %s", pod.Name)
-		groupName = getDefaultGroupName(pod)
-	}
-	klog.Infof("[Fluence] Group name for %s is %s", pod.Name, groupName)
-	klog.Infof("[Fluence] Group size for %s is %d", pod.Name, groupSize)
-
-	// Register the pod group (with the pod) in our cache
-	fcore.RegisterPodGroup(pod, groupName, groupSize)
-	return groupName
-}
-
-// deleteFluenceGroup ensures the pod group is deleted, if it exists
-func DeleteFluenceGroup(pod *v1.Pod) {
-	// Get the group name and size from the fluence labels
-	pg := GetPodsGroup(pod)
-	fcore.DeletePodGroup(pg.Name)
-	klog.Infof("[Fluence] known groups are:\n")
-	fcore.ListGroups()
-}
-
-// getFluenceGroupName looks for the group to indicate a fluence group, and returns it
-func getFluenceGroupName(pod *v1.Pod) string {
-	groupName, _ := pod.Labels[labels.PodGroupLabel]
-	return groupName
-}
-
-// getFluenceGroupSize gets the size of the fluence group
-func getFluenceGroupSize(pod *v1.Pod) int32 {
-	size, _ := pod.Labels[labels.PodGroupSizeLabel]
-
-	// Default size of 1 if the label is not set (but name is)
-	if size == "" {
-		return 1
-	}
-
-	// We don't want the scheduler to fail if someone puts a value for size
-	// that doesn't convert nicely. They can find this in the logs.
-	intSize, err := strconv.ParseUint(size, 10, 32)
-	if err != nil {
-		klog.Error("   [Fluence] Parsing integer size for pod group")
-	}
-	return int32(intSize)
-}
-
 // GetCreationTimestamp first tries the fluence group, then falls back to the initial attempt timestamp
-func GetCreationTimestamp(groupName string, podInfo *framework.QueuedPodInfo) metav1.MicroTime {
-	pg := fcore.GetPodGroup(groupName)
+// This is the only update we have made to the upstream PodGroupManager, because we are expecting
+// a MicroTime and not a time.Time.
+func GetCreationTimestamp(groupName string, pg *sched.PodGroup, podInfo *framework.QueuedPodInfo) metav1.MicroTime {
 
 	// IsZero is an indicator if this was actually set
 	// If the group label was present and we have a group, this will be true
-	if !pg.TimeCreated.IsZero() {
-		klog.Infof("   [Fluence] Pod group %s was created at %s\n", groupName, pg.TimeCreated)
-		return pg.TimeCreated
+	if !pg.Status.ScheduleStartTime.IsZero() {
+		klog.Infof("   [Fluence] Pod group %s was created at %s\n", groupName, pg.Status.ScheduleStartTime)
+		return pg.Status.ScheduleStartTime
 	}
 	// We should actually never get here.
 	klog.Errorf("   [Fluence] Pod group %s time IsZero, we should not have reached here", groupName)
diff --git a/sig-scheduler-plugins/pkg/fluence/utils/utils.go b/sig-scheduler-plugins/pkg/fluence/utils/utils.go
index e384669..f2969d2 100644
--- a/sig-scheduler-plugins/pkg/fluence/utils/utils.go
+++ b/sig-scheduler-plugins/pkg/fluence/utils/utils.go
@@ -21,7 +21,7 @@ import (
 	"strings"
 
 	v1 "k8s.io/api/core/v1"
-	"k8s.io/klog/v2"
+	klog "k8s.io/klog/v2"
 	pb "sigs.k8s.io/scheduler-plugins/pkg/fluence/fluxcli-grpc"
 )
 
@@ -39,12 +39,14 @@ func getPodJobspecLabels(pod *v1.Pod) []string {
 	return labels
 }
 
-// InspectPodInfo takes a pod object and returns the pod.spec
-// Note from vsoch - I updated this to calculate containers across the pod
-// if that's wrong we can change it back.
-func InspectPodInfo(pod *v1.Pod) *pb.PodSpec {
+// PreparePodJobSpec takes a pod object and returns the jobspec
+// The jobspec is based on the pod, and assumes it will be duplicated
+// for a MatchAllocate request (representing all pods). We name the
+// jobspec based on the group and not the individual ID.
+// This calculates across containers in the od
+func PreparePodJobSpec(pod *v1.Pod, groupName string) *pb.PodSpec {
 	ps := new(pb.PodSpec)
-	ps.Id = pod.Name
+	ps.Id = groupName
 
 	// Note from vsoch - there was an if check here to see if we had labels,
 	// I don't think there is risk to adding an empty list but we can add
diff --git a/src/fluence/fluxion/fluxion.go b/src/fluence/fluxion/fluxion.go
index 5775199..05e94fa 100644
--- a/src/fluence/fluxion/fluxion.go
+++ b/src/fluence/fluxion/fluxion.go
@@ -8,7 +8,7 @@ import (
 	"github.com/flux-framework/flux-k8s/flux-plugin/fluence/jobspec"
 	"github.com/flux-framework/flux-k8s/flux-plugin/fluence/utils"
 	"github.com/flux-framework/fluxion-go/pkg/fluxcli"
-	"k8s.io/klog/v2"
+	klog "k8s.io/klog/v2"
 
 	"context"
 	"errors"

From 8e0b4613f574521a0a4d75b0c49878656e04ef7a Mon Sep 17 00:00:00 2001
From: vsoch <vsoch@users.noreply.github.com>
Date: Sun, 18 Feb 2024 21:40:05 -0700
Subject: [PATCH 16/28] feat: podgroup deletion when finished/failed

Problem: Since the PodGroup controller creates the PodGroup, it
should delete it as well.
Solution: Ideally I wanted to attach an owner reference, meaning
that the top level job (that also owns the pod) would be owner
to the PodGroup. But that does not seem to take - either because
the controller is the owner or the field is read only for k8s.
For the time being, I decided to delete the PodGroup when the
group is determined to be Finished/Failed, which happens when
that number of pods equals or exceeds the MinimumSize. I think
granted that MinimumSize == size this should be OK with fluence,
and we might need to consider other approaches if/when the
min size is smaller than the total size (because fluence might
still see a pod in the queue and try to schedule again. I think
what we might do in that case is just update the MinSize for
the group, so if fluence schedules again it will be for the
smaller size. But not sure about that either! TBA. The
important thing now is that the pod group cleans itself up!

Signed-off-by: vsoch <vsoch@users.noreply.github.com>
---
 examples/test_example/fluence-sized-job.yaml  | 16 ++++
 .../pkg/controllers/podgroup_controller.go    | 89 +++++++++++++------
 2 files changed, 80 insertions(+), 25 deletions(-)
 create mode 100644 examples/test_example/fluence-sized-job.yaml

diff --git a/examples/test_example/fluence-sized-job.yaml b/examples/test_example/fluence-sized-job.yaml
new file mode 100644
index 0000000..a195d87
--- /dev/null
+++ b/examples/test_example/fluence-sized-job.yaml
@@ -0,0 +1,16 @@
+apiVersion: batch/v1
+kind: Job
+metadata:
+  name: fluence-sized-job
+spec:
+  parallelism: 3
+  completions: 3
+  template:
+    spec:
+      schedulerName: fluence
+      containers:
+      - name: fluence-job
+        image: busybox
+        command: [echo, potato]
+      restartPolicy: Never
+  backoffLimit: 4
diff --git a/sig-scheduler-plugins/pkg/controllers/podgroup_controller.go b/sig-scheduler-plugins/pkg/controllers/podgroup_controller.go
index 72bda77..fa4593c 100644
--- a/sig-scheduler-plugins/pkg/controllers/podgroup_controller.go
+++ b/sig-scheduler-plugins/pkg/controllers/podgroup_controller.go
@@ -125,60 +125,81 @@ func (r *PodGroupReconciler) Reconcile(ctx context.Context, req ctrl.Request) (c
 		// to account for if the labels are different, do we take the smallest?
 		log.Info("PodGroup", "Status", fmt.Sprintf("WARNING: Pod group current MinMember %s does not match %d", pg.Spec.MinMember, size))
 	}
+	return r.updateStatus(ctx, pg, podList.Items)
 
-	// If we get here, we have a PodGroup with a set size and can inspect / update phase
-	pods := podList.Items
-	pgCopy := pg.DeepCopy()
+}
+func (r *PodGroupReconciler) updateStatus(
+	ctx context.Context,
+	pg *schedv1alpha1.PodGroup,
+	pods []v1.Pod,
+) (ctrl.Result, error) {
 
-	switch pgCopy.Status.Phase {
+	patch := client.MergeFrom(pg.DeepCopy())
+
+	switch pg.Status.Phase {
 	case "":
-		pgCopy.Status.Phase = schedv1alpha1.PodGroupPending
+		pg.Status.Phase = schedv1alpha1.PodGroupPending
+		result, err := r.updateOwnerReferences(ctx, pg, &pods[0])
+		if result.Requeue || err != nil {
+			return result, err
+		}
+
 	case schedv1alpha1.PodGroupPending:
 		if len(pods) >= int(pg.Spec.MinMember) {
-			pgCopy.Status.Phase = schedv1alpha1.PodGroupScheduling
-
-			// Always update owner references to be the first pod
-			// E.g., if a job owns it, ensure the group is deleted with it
-			updateOwnerReferences(pgCopy, &pods[0])
+			pg.Status.Phase = schedv1alpha1.PodGroupScheduling
+			result, err := r.updateOwnerReferences(ctx, pg, &pods[0])
+			if result.Requeue || err != nil {
+				return result, err
+			}
 		}
 	default:
 
 		// Get updated counts of running, succeeded, and failed pods
-		pgCopy.Status.Running, pgCopy.Status.Succeeded, pgCopy.Status.Failed = getCurrentPodStats(pods)
+		running, succeeded, failed := getCurrentPodStats(pods)
 
 		// If for some reason we weren't pending and now have fewer than min required, flip back to pending
 		if len(pods) < int(pg.Spec.MinMember) {
-			pgCopy.Status.Phase = schedv1alpha1.PodGroupPending
+			pg.Status.Phase = schedv1alpha1.PodGroupPending
 			break
 		}
 
 		// A pod with succeeded + running STILL less than the minimum required is scheduling
-		if pgCopy.Status.Succeeded+pgCopy.Status.Running < pg.Spec.MinMember {
-			pgCopy.Status.Phase = schedv1alpha1.PodGroupScheduling
+		if succeeded+running < pg.Spec.MinMember {
+			pg.Status.Phase = schedv1alpha1.PodGroupScheduling
 		}
 
 		// A pod with succeeded + running >= the minimum required is running!
-		if pgCopy.Status.Succeeded+pgCopy.Status.Running >= pg.Spec.MinMember {
-			pgCopy.Status.Phase = schedv1alpha1.PodGroupRunning
+		if succeeded+running >= pg.Spec.MinMember {
+			pg.Status.Phase = schedv1alpha1.PodGroupRunning
 		}
 
 		// We have non zero failed, and the total of failed, running amd succeeded > min member
 		// Final state of pod group is FAILED womp womp
-		if pgCopy.Status.Failed != 0 &&
-			pgCopy.Status.Failed+pgCopy.Status.Running+pgCopy.Status.Succeeded >= pg.Spec.MinMember {
-			pgCopy.Status.Phase = schedv1alpha1.PodGroupFailed
+		if failed != 0 && failed+running+succeeded >= pg.Spec.MinMember {
+			pg.Status.Phase = schedv1alpha1.PodGroupFailed
 		}
 
 		// Finished! This is where we want to get :)
 		// TODO: ideally the owning higher level object deletion will delete here,
 		// but that won't always work for one of pods - need a new strategy
-		if pgCopy.Status.Succeeded >= pg.Spec.MinMember {
-			pgCopy.Status.Phase = schedv1alpha1.PodGroupFinished
+		if succeeded >= pg.Spec.MinMember {
+			pg.Status.Phase = schedv1alpha1.PodGroupFinished
 		}
+		pg.Status.Running = running
+		pg.Status.Failed = failed
+		pg.Status.Succeeded = succeeded
 	}
 
-	// TODO need better handling here of cleanup, etc. This mostly handles status changes
-	return r.patchPodGroup(ctx, pg, pgCopy)
+	// Apply the patch to update, or delete if finished
+	// TODO would be better if owner references took here, so delete on owner deletion
+	var err error
+	if pg.Status.Phase == schedv1alpha1.PodGroupFinished || pg.Status.Phase == schedv1alpha1.PodGroupFailed {
+		err = r.Delete(ctx, pg)
+	} else {
+		r.Status().Update(ctx, pg)
+		err = r.Patch(ctx, pg, patch)
+	}
+	return ctrl.Result{Requeue: true}, err
 }
 
 // newPodGroup creates a new podGroup object, capturing the creation time
@@ -273,21 +294,37 @@ func getCurrentPodStats(pods []v1.Pod) (int32, int32, int32) {
 
 // updateOwnerReferences ensures the group is always owned by the same entity that owns the pod
 // This ensures that, for example, a job that is wrapping pods is the owner.
-func updateOwnerReferences(pg *schedv1alpha1.PodGroup, pod *v1.Pod) {
+func (r *PodGroupReconciler) updateOwnerReferences(
+	ctx context.Context,
+	pg *schedv1alpha1.PodGroup,
+	pod *v1.Pod,
+) (ctrl.Result, error) {
 
 	// Case 1: The pod itself doesn't have owner references. YOLO
 	if len(pod.OwnerReferences) == 0 {
-		return
+		return ctrl.Result{}, nil
 	}
 
+	// Collect owner references for pod group
+	owners := []metav1.OwnerReference{}
 	var refs []string
 	for _, ownerRef := range pod.OwnerReferences {
 		refs = append(refs, fmt.Sprintf("%s/%s", pod.Namespace, ownerRef.Name))
+		owners = append(owners, ownerRef)
 	}
+	patch := client.MergeFrom(pg.DeepCopy())
 	if len(refs) != 0 {
 		sort.Strings(refs)
 		pg.Status.OccupiedBy = strings.Join(refs, ",")
 	}
+	if len(owners) > 0 {
+		pg.ObjectMeta.OwnerReferences = owners
+	}
+	// Apply the patch to update the size
+	r.Status().Update(ctx, pg)
+	err := r.Patch(ctx, pg, patch)
+	return ctrl.Result{Requeue: true}, err
+
 }
 
 // SetupWithManager sets up the controller with the Manager.
@@ -346,6 +383,8 @@ func (r *PodGroupReconciler) ensurePodGroup(ctx context.Context, obj client.Obje
 		if apierrs.IsNotFound(err) {
 			r.log.Info("Pod: ", "Status", pod.Status.Phase, "Name", pod.Name, "Group", groupName, "Namespace", pod.Namespace, "Action", "Creating PodGroup")
 
+			//owner := r.getOwnerMetadata(pod)
+
 			// TODO should an owner be set here? Setting to a specific pod seems risky/wrong in case deleted.
 			err, _ := r.newPodGroup(ctx, groupName, pod.Namespace, int32(groupSize))
 			if err != nil {

From 68815a5a3059b1f2f6e0dc4190093b012af92d1e Mon Sep 17 00:00:00 2001
From: vsoch <vsoch@users.noreply.github.com>
Date: Sun, 18 Feb 2024 23:56:36 -0700
Subject: [PATCH 17/28] feat: add support for other abstractions

Problem: we need to be able to run deployments, stateful/replica sets
and have them handled by fluence.
Solution: allow the webhook to create pod groups for them. In the case
they are not targeted for fluence (any abstraction) and get into the
PreFilter, allow creation of a FauxPodGroup that will simply schedule one
job for the pod. We do this twice - in PreFilter and in the events
for update/delete.

Signed-off-by: vsoch <vsoch@users.noreply.github.com>
---
 README.md                                     |   8 +-
 .../simple_example/fluence-deployment.yaml    |  19 ++
 .../simple_example/fluence-replicaset.yaml    |  21 ++
 .../simple_example/fluence-statefulset.yaml   |  21 ++
 .../scheduling/v1alpha1/podgroup_webhook.go   | 224 ++++++++++++++----
 .../mutating-webhook-configuration.yaml       |   5 +
 .../pkg/controllers/podgroup_controller.go    |   4 +-
 sig-scheduler-plugins/pkg/fluence/events.go   |  17 +-
 sig-scheduler-plugins/pkg/fluence/fluence.go  |  41 ++--
 .../pkg/fluence/group/group.go                |  21 ++
 10 files changed, 312 insertions(+), 69 deletions(-)
 create mode 100644 examples/simple_example/fluence-deployment.yaml
 create mode 100644 examples/simple_example/fluence-replicaset.yaml
 create mode 100644 examples/simple_example/fluence-statefulset.yaml

diff --git a/README.md b/README.md
index 8556dd1..8922078 100644
--- a/README.md
+++ b/README.md
@@ -86,7 +86,10 @@ spec:
   backoffLimit: 4
 ```
 
-There is no reason pods with different names or under different abstractions cannot be part of the same group that needs to be scheduled together.
+There is no reason pods with different names or under different abstractions cannot be part of the same group that needs to be scheduled together. Also note that:
+
+- We currently do not allow scheduling to a control plane
+- Deployments, StatefulSets, and ReplicaSets can be scheduled and have pod groups created, however the pod groups are not cleaned up as these abstractions are not meant to complete.
 
 ### Deploy
 
@@ -520,7 +523,8 @@ kind create cluster --config ./kind-config.yaml
 #### TODO
 
  - Try what [kueue does](https://github.com/kubernetes-sigs/kueue/blob/6d57813a52066dab412735deeeb60ebb0cdb8e8e/cmd/kueue/main.go#L146-L155) to not require cert-manager.
- - Add other abstraction types to be intercepted (and labeled with sizes)
+ - Try other strategies for setting owner references (so cleans up when owner deleted)
+   - When that is done, add tests for deletion of pod group (the current method is not perfect and needs improvement)
 
 #### Components
 
diff --git a/examples/simple_example/fluence-deployment.yaml b/examples/simple_example/fluence-deployment.yaml
new file mode 100644
index 0000000..9eb6cef
--- /dev/null
+++ b/examples/simple_example/fluence-deployment.yaml
@@ -0,0 +1,19 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: example-deployment
+spec:
+  selector:
+    matchLabels:
+      app: example-deployment
+  replicas: 3
+  template:
+    metadata:
+      labels:
+        app: example-deployment
+    spec:
+      schedulerName: fluence 
+      containers:
+      - name: example
+        image: rockylinux:9
+        command: ["sleep", "infinity"]
\ No newline at end of file
diff --git a/examples/simple_example/fluence-replicaset.yaml b/examples/simple_example/fluence-replicaset.yaml
new file mode 100644
index 0000000..f00e826
--- /dev/null
+++ b/examples/simple_example/fluence-replicaset.yaml
@@ -0,0 +1,21 @@
+apiVersion: apps/v1
+kind: ReplicaSet
+metadata:
+  name: example-replicaset
+  labels:
+    app: example-replicaset
+spec:
+  replicas: 3
+  selector:
+    matchLabels:
+      app: example-replicaset
+  template:
+    metadata:
+      labels:
+        app: example-replicaset
+    spec:
+      schedulerName: fluence 
+      containers:
+      - name: example
+        image: rockylinux:9
+        command: ["sleep", "infinity"]
\ No newline at end of file
diff --git a/examples/simple_example/fluence-statefulset.yaml b/examples/simple_example/fluence-statefulset.yaml
new file mode 100644
index 0000000..80da82a
--- /dev/null
+++ b/examples/simple_example/fluence-statefulset.yaml
@@ -0,0 +1,21 @@
+apiVersion: apps/v1
+kind: StatefulSet
+metadata:
+  name: example-statefulset
+  labels:
+    app: example-statefulset
+spec:
+  replicas: 3
+  selector:
+    matchLabels:
+      app: example-statefulset
+  template:
+    metadata:
+      labels:
+        app: example-statefulset
+    spec:
+      schedulerName: fluence 
+      containers:
+      - name: example
+        image: rockylinux:9
+        command: ["sleep", "infinity"]
\ No newline at end of file
diff --git a/sig-scheduler-plugins/apis/scheduling/v1alpha1/podgroup_webhook.go b/sig-scheduler-plugins/apis/scheduling/v1alpha1/podgroup_webhook.go
index bc99fe4..c2582f9 100644
--- a/sig-scheduler-plugins/apis/scheduling/v1alpha1/podgroup_webhook.go
+++ b/sig-scheduler-plugins/apis/scheduling/v1alpha1/podgroup_webhook.go
@@ -15,6 +15,7 @@ import (
 	"fmt"
 	"net/http"
 
+	appsv1 "k8s.io/api/apps/v1"
 	batchv1 "k8s.io/api/batch/v1"
 	corev1 "k8s.io/api/core/v1"
 	runtime "k8s.io/apimachinery/pkg/runtime"
@@ -51,83 +52,127 @@ type fluenceWatcher struct {
 // not be added again).
 func (a *fluenceWatcher) Handle(ctx context.Context, req admission.Request) admission.Response {
 
-	logger.Info("Running webhook handle")
+	logger.Info("Running webhook handle, determining pod wrapper abstraction...")
 
-	// Try for a job first, which would be created before pods
 	job := &batchv1.Job{}
 	err := a.decoder.Decode(req, job)
-	if err != nil {
-
-		// Assume we operate on the level of pods for now
-		pod := &corev1.Pod{}
-		err := a.decoder.Decode(req, pod)
-
-		// Assume it's a pod group or something else.
-		// We aren't in charge of validating people's pods.
-		// I don't think we should ever hit this case, actually
+	if err == nil {
+		err = a.EnsureGroupOnJob(job)
+		if err != nil {
+			logger.Error(err, "Issue adding PodGroup to Job")
+			return admission.Errored(http.StatusBadRequest, err)
+		}
+		marshalledJob, err := json.Marshal(job)
 		if err != nil {
-			return admission.Allowed("Found non-pod, non-job, this webhook does not validate beyond those.")
+			logger.Error(err, "Marshalling job error.")
+			return admission.Errored(http.StatusInternalServerError, err)
 		}
+		logger.Info("Admission job success.")
+		return admission.PatchResponseFromRaw(req.Object.Raw, marshalledJob)
+	}
 
-		// If we get here, we decoded a pod
+	pod := &corev1.Pod{}
+	err = a.decoder.Decode(req, pod)
+	if err == nil {
 		err = a.EnsureGroup(pod)
 		if err != nil {
-			logger.Error(err, "Issue adding PodGroup to pod.")
+			logger.Error(err, "Issue adding PodGroup to Pod")
 			return admission.Errored(http.StatusBadRequest, err)
 		}
-
-		// Send the updated pod to the events channel
-		//*a.events <- event.GenericEvent{Object: pod}
-		logger.Info("Admission pod success.")
-
 		marshalledPod, err := json.Marshal(pod)
 		if err != nil {
-			logger.Error(err, "Marshalling pod error.")
+			logger.Error(err, "Marshalling pod error")
 			return admission.Errored(http.StatusInternalServerError, err)
 		}
-
-		logger.Info("Admission job success.")
+		logger.Info("Admission pod success")
 		return admission.PatchResponseFromRaw(req.Object.Raw, marshalledPod)
 	}
 
-	// If we get here, err was nil and we have a Job!
-	err = a.EnsureGroupOnJob(job)
-	if err != nil {
-		logger.Error(err, "Issue adding PodGroup to job.")
-		return admission.Errored(http.StatusBadRequest, err)
+	set := &appsv1.StatefulSet{}
+	err = a.decoder.Decode(req, set)
+	if err == nil {
+		err = a.EnsureGroupStatefulSet(set)
+		if err != nil {
+			logger.Error(err, "Issue adding PodGroup to StatefulSet")
+			return admission.Errored(http.StatusBadRequest, err)
+		}
+		marshalledSet, err := json.Marshal(set)
+		if err != nil {
+			logger.Error(err, "Marshalling StatefulSet error")
+			return admission.Errored(http.StatusInternalServerError, err)
+		}
+		logger.Info("Admission StatefulSet success")
+		return admission.PatchResponseFromRaw(req.Object.Raw, marshalledSet)
 	}
 
-	// Send the updated job to the events channel
-	//*a.events <- event.GenericEvent{Object: job}
-	logger.Info("Admission job success.")
+	d := &appsv1.Deployment{}
+	err = a.decoder.Decode(req, d)
+	if err == nil {
+		err = a.EnsureGroupDeployment(d)
+		if err != nil {
+			logger.Error(err, "Issue adding PodGroup to Deployment")
+			return admission.Errored(http.StatusBadRequest, err)
+		}
+		marshalledD, err := json.Marshal(d)
+		if err != nil {
+			logger.Error(err, "Marshalling Deployment error")
+			return admission.Errored(http.StatusInternalServerError, err)
+		}
+		logger.Info("Admission Deployment success")
+		return admission.PatchResponseFromRaw(req.Object.Raw, marshalledD)
+	}
 
-	marshalledJob, err := json.Marshal(job)
-	if err != nil {
-		logger.Error(err, "Marshalling job error.")
-		return admission.Errored(http.StatusInternalServerError, err)
+	rset := &appsv1.ReplicaSet{}
+	err = a.decoder.Decode(req, rset)
+	if err == nil {
+		err = a.EnsureGroupReplicaSet(rset)
+		if err != nil {
+			logger.Error(err, "Issue adding PodGroup to ReplicaSet")
+			return admission.Errored(http.StatusBadRequest, err)
+		}
+		marshalledSet, err := json.Marshal(rset)
+		if err != nil {
+			logger.Error(err, "Marshalling StatefulSet error")
+			return admission.Errored(http.StatusInternalServerError, err)
+		}
+		logger.Info("Admission StatefulSet success")
+		return admission.PatchResponseFromRaw(req.Object.Raw, marshalledSet)
 	}
 
-	logger.Info("Admission job success.")
-	return admission.PatchResponseFromRaw(req.Object.Raw, marshalledJob)
+	// We should not get down here
+	return admission.Allowed("Object not known, this webhook does not validate beyond those.")
+
 }
 
 // Default is the expected entrypoint for a webhook...
 // I don't remember if this is even called...
 func (a *fluenceWatcher) Default(ctx context.Context, obj runtime.Object) error {
-	job, ok := obj.(*batchv1.Job)
-	if !ok {
-		pod, ok := obj.(*corev1.Pod)
 
-		// This is adkin to an admission success - it's not a pod or job, so we don't care
-		// I don't think we should ever hit this case, actually
-		if !ok {
-			return nil
-		}
-		logger.Info(fmt.Sprintf("Pod %s is marked for fluence.", pod.Name))
+	switch obj.(type) {
+	case *batchv1.Job:
+		job := obj.(*batchv1.Job)
+		return a.EnsureGroupOnJob(job)
+
+	case *corev1.Pod:
+		pod := obj.(*corev1.Pod)
 		return a.EnsureGroup(pod)
+
+	case *appsv1.StatefulSet:
+		set := obj.(*appsv1.StatefulSet)
+		return a.EnsureGroupStatefulSet(set)
+
+	case *appsv1.Deployment:
+		d := obj.(*appsv1.Deployment)
+		return a.EnsureGroupDeployment(d)
+
+	case *appsv1.ReplicaSet:
+		set := obj.(*appsv1.ReplicaSet)
+		return a.EnsureGroupReplicaSet(set)
+
+	default:
+		// no match
 	}
-	logger.Info(fmt.Sprintf("Job %s is marked for fluence.", job.Name))
-	return a.EnsureGroupOnJob(job)
+	return nil
 }
 
 // EnsureGroup adds pod group label and size if not present
@@ -205,3 +250,88 @@ func (a *fluenceWatcher) EnsureGroupOnJob(job *batchv1.Job) error {
 	job.Spec.Template.ObjectMeta.Labels[labels.PodGroupSizeLabel] = groupSize
 	return nil
 }
+
+// EnsureGroupStatefulSet creates a PodGroup for a StatefulSet
+func (a *fluenceWatcher) EnsureGroupStatefulSet(set *appsv1.StatefulSet) error {
+
+	// StatefulSet requires on top level explicitly
+	if set.Labels == nil {
+		set.Labels = map[string]string{}
+	}
+	defaultName := fmt.Sprintf("fluence-group-%s-%s", set.Namespace, set.Name)
+	groupName, ok := set.Labels[labels.PodGroupLabel]
+	if !ok {
+		groupName = defaultName
+	}
+	set.Spec.Template.ObjectMeta.Labels[labels.PodGroupLabel] = groupName
+
+	// Now do the same for the size, but the size is the size of the job
+	size := *set.Spec.Replicas
+	if size == int32(0) {
+		size = int32(1)
+	}
+	labelSize := fmt.Sprintf("%d", size)
+	groupSize, ok := set.Labels[labels.PodGroupSizeLabel]
+	if !ok {
+		groupSize = labelSize
+	}
+	set.Spec.Template.ObjectMeta.Labels[labels.PodGroupSizeLabel] = groupSize
+	return nil
+}
+
+// EnsureGroupStatefulSet creates a PodGroup for a StatefulSet
+func (a *fluenceWatcher) EnsureGroupReplicaSet(set *appsv1.ReplicaSet) error {
+
+	// StatefulSet requires on top level explicitly
+	if set.Labels == nil {
+		set.Labels = map[string]string{}
+	}
+	defaultName := fmt.Sprintf("fluence-group-%s-%s", set.Namespace, set.Name)
+	groupName, ok := set.Labels[labels.PodGroupLabel]
+	if !ok {
+		groupName = defaultName
+	}
+	set.Spec.Template.ObjectMeta.Labels[labels.PodGroupLabel] = groupName
+
+	// Now do the same for the size, but the size is the size of the job
+	size := *set.Spec.Replicas
+	if size == int32(0) {
+		size = int32(1)
+	}
+	labelSize := fmt.Sprintf("%d", size)
+	groupSize, ok := set.Labels[labels.PodGroupSizeLabel]
+	if !ok {
+		groupSize = labelSize
+	}
+	set.Spec.Template.ObjectMeta.Labels[labels.PodGroupSizeLabel] = groupSize
+	return nil
+}
+
+// EnsureGroupDeployment creates a PodGroup for a Deployment
+// This is redundant, can refactor later
+func (a *fluenceWatcher) EnsureGroupDeployment(d *appsv1.Deployment) error {
+
+	// StatefulSet requires on top level explicitly
+	if d.Labels == nil {
+		d.Labels = map[string]string{}
+	}
+	defaultName := fmt.Sprintf("fluence-group-%s-%s", d.Namespace, d.Name)
+	groupName, ok := d.Labels[labels.PodGroupLabel]
+	if !ok {
+		groupName = defaultName
+	}
+	d.Spec.Template.ObjectMeta.Labels[labels.PodGroupLabel] = groupName
+
+	// Now do the same for the size, but the size is the size of the job
+	size := *d.Spec.Replicas
+	if size == int32(0) {
+		size = int32(1)
+	}
+	labelSize := fmt.Sprintf("%d", size)
+	groupSize, ok := d.Labels[labels.PodGroupSizeLabel]
+	if !ok {
+		groupSize = labelSize
+	}
+	d.Spec.Template.ObjectMeta.Labels[labels.PodGroupSizeLabel] = groupSize
+	return nil
+}
diff --git a/sig-scheduler-plugins/manifests/install/charts/as-a-second-scheduler/templates/mutating-webhook-configuration.yaml b/sig-scheduler-plugins/manifests/install/charts/as-a-second-scheduler/templates/mutating-webhook-configuration.yaml
index c639127..edbe7f0 100644
--- a/sig-scheduler-plugins/manifests/install/charts/as-a-second-scheduler/templates/mutating-webhook-configuration.yaml
+++ b/sig-scheduler-plugins/manifests/install/charts/as-a-second-scheduler/templates/mutating-webhook-configuration.yaml
@@ -26,6 +26,7 @@ webhooks:
   - apiGroups:
     - ""
     - core
+    - apps
     - batch
     - scheduling.x-k8s.io
     apiVersions:
@@ -36,6 +37,10 @@ webhooks:
     resources:
     - pods
     - jobs
+    - statefulsets
+    - deployments
+    - replicasets
+
 # Can uncomment this if we want to mutate the pod groups after creation
 #    - podgroups
   sideEffects: None
diff --git a/sig-scheduler-plugins/pkg/controllers/podgroup_controller.go b/sig-scheduler-plugins/pkg/controllers/podgroup_controller.go
index fa4593c..ee267bd 100644
--- a/sig-scheduler-plugins/pkg/controllers/podgroup_controller.go
+++ b/sig-scheduler-plugins/pkg/controllers/podgroup_controller.go
@@ -123,7 +123,7 @@ func (r *PodGroupReconciler) Reconcile(ctx context.Context, req ctrl.Request) (c
 		// TODO: Not clear what to do here. Arguably, we also want to check the label size
 		// because (in the future) we can accept smaller sizes. But then we also need
 		// to account for if the labels are different, do we take the smallest?
-		log.Info("PodGroup", "Status", fmt.Sprintf("WARNING: Pod group current MinMember %s does not match %d", pg.Spec.MinMember, size))
+		log.Info("PodGroup", "Status", fmt.Sprintf("WARNING: Pod group current MinMember %d does not match %d", pg.Spec.MinMember, size))
 	}
 	return r.updateStatus(ctx, pg, podList.Items)
 
@@ -192,6 +192,8 @@ func (r *PodGroupReconciler) updateStatus(
 
 	// Apply the patch to update, or delete if finished
 	// TODO would be better if owner references took here, so delete on owner deletion
+	// TODO deletion is not currently handled for Deployment, ReplicaSet, StatefulSet
+	// as they are expected to persist. You can delete / lose and bring up again
 	var err error
 	if pg.Status.Phase == schedv1alpha1.PodGroupFinished || pg.Status.Phase == schedv1alpha1.PodGroupFailed {
 		err = r.Delete(ctx, pg)
diff --git a/sig-scheduler-plugins/pkg/fluence/events.go b/sig-scheduler-plugins/pkg/fluence/events.go
index bc265f7..395517a 100644
--- a/sig-scheduler-plugins/pkg/fluence/events.go
+++ b/sig-scheduler-plugins/pkg/fluence/events.go
@@ -9,6 +9,7 @@ import (
 	klog "k8s.io/klog/v2"
 
 	pb "sigs.k8s.io/scheduler-plugins/pkg/fluence/fluxcli-grpc"
+	fgroup "sigs.k8s.io/scheduler-plugins/pkg/fluence/group"
 )
 
 // Events are associated with inforers, typically on pods, e.g.,
@@ -69,7 +70,13 @@ func (f *Fluence) updatePod(oldObj, newObj interface{}) {
 
 	// a pod is updated, get the group
 	// TODO should we be checking group / size for old vs new?
-	groupName, _ := f.pgMgr.GetPodGroup(context.TODO(), oldPod)
+	groupName, pg := f.pgMgr.GetPodGroup(context.TODO(), oldPod)
+
+	// If PodGroup is nil, still try to look up a faux name
+	if pg == nil {
+		pg = fgroup.CreateFakeGroup(oldPod)
+		groupName = pg.Name
+	}
 
 	klog.Infof("[Fluence] Processing event for pod %s in group %s from %s to %s", newPod.Name, groupName, newPod.Status.Phase, oldPod.Status.Phase)
 
@@ -119,7 +126,13 @@ func (f *Fluence) updatePod(oldObj, newObj interface{}) {
 func (f *Fluence) deletePod(podObj interface{}) {
 	klog.Info("[Fluence] Delete Pod event handler")
 	pod := podObj.(*v1.Pod)
-	groupName, _ := f.pgMgr.GetPodGroup(context.TODO(), pod)
+	groupName, pg := f.pgMgr.GetPodGroup(context.TODO(), pod)
+
+	// If PodGroup is nil, still try to look up a faux name
+	if pg == nil {
+		pg = fgroup.CreateFakeGroup(pod)
+		groupName = pg.Name
+	}
 
 	klog.Infof("[Fluence] Delete pod %s in group %s has status %s", pod.Status.Phase, pod.Name, groupName)
 	switch pod.Status.Phase {
diff --git a/sig-scheduler-plugins/pkg/fluence/fluence.go b/sig-scheduler-plugins/pkg/fluence/fluence.go
index 0e8ec21..8cdc066 100644
--- a/sig-scheduler-plugins/pkg/fluence/fluence.go
+++ b/sig-scheduler-plugins/pkg/fluence/fluence.go
@@ -165,30 +165,37 @@ func (f *Fluence) PreFilter(
 	// groupName will be named according to the single pod namespace / pod if there wasn't
 	// a user defined group. This is a size 1 group we handle equivalently.
 	groupName, pg := f.pgMgr.GetPodGroup(ctx, pod)
-	klog.Infof("[Fluence] Pod %s is in group %s with minimum members %d", pod.Name, groupName, pg.Spec.MinMember)
 
-	// Has this podgroup been seen by fluence yet? If yes, we will have it in the cache
-	cache := fcore.GetFluenceCache(groupName)
-	klog.Infof("[Fluence] cache %s", cache)
-
-	// Fluence has never seen this before, we need to schedule an allocation
-	// It also could have been seen, but was not able to get one.
-	if cache == nil {
-		klog.Infof("[Fluence] Does not have nodes for %s yet, asking Fluxion", groupName)
-
-		// groupName is the namespaced name <namespace>/<name>
-		err := f.AskFlux(ctx, pod, pg, groupName)
+	// Not scheduled by fluence - we have no idea about groups or sizes, just ask for one
+	if pg == nil {
+		klog.Infof("[Fluence] Unknown request to schedule %s yet, asking Fluxion for one node", pod.Name)
+		pg = fgroup.CreateFakeGroup(pod)
+		err := f.AskFlux(ctx, pod, pg, pg.Name)
 		if err != nil {
 			klog.Infof("[Fluence] Fluxion returned an error %s, not schedulable", err.Error())
 			return nil, framework.NewStatus(framework.Unschedulable, err.Error())
 		}
+	} else {
+		klog.Infof("[Fluence] Pod %s is in group %s with minimum members %d", pod.Name, groupName, pg.Spec.MinMember)
+
+		// Has this podgroup been seen by fluence yet? If yes, we will have it in the cache
+		cache := fcore.GetFluenceCache(groupName)
+		klog.Infof("[Fluence] cache %s", cache)
+
+		// Fluence has never seen this before, we need to schedule an allocation
+		// It also could have been seen, but was not able to get one.
+		if cache == nil {
+			klog.Infof("[Fluence] Does not have nodes for %s yet, asking Fluxion", groupName)
+
+			// groupName is the namespaced name <namespace>/<name>
+			err := f.AskFlux(ctx, pod, pg, groupName)
+			if err != nil {
+				klog.Infof("[Fluence] Fluxion returned an error %s, not schedulable", err.Error())
+				return nil, framework.NewStatus(framework.Unschedulable, err.Error())
+			}
+		}
 	}
 
-	// We can only get here if an allocation is done (and there is no error above)
-	// The cache would only originally be nil if we didn't do that yet. It should
-	// always be defined (not nil) when we get here
-	cache = fcore.GetFluenceCache(groupName)
-
 	// This is the next node in the list
 	nodename, err := fcore.GetNextNode(groupName)
 	if err != nil {
diff --git a/sig-scheduler-plugins/pkg/fluence/group/group.go b/sig-scheduler-plugins/pkg/fluence/group/group.go
index 455b9e5..0ee0831 100644
--- a/sig-scheduler-plugins/pkg/fluence/group/group.go
+++ b/sig-scheduler-plugins/pkg/fluence/group/group.go
@@ -1,6 +1,9 @@
 package group
 
 import (
+	"fmt"
+
+	corev1 "k8s.io/api/core/v1"
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 	klog "k8s.io/klog/v2"
 	"k8s.io/kubernetes/pkg/scheduler/framework"
@@ -8,11 +11,29 @@ import (
 	sched "sigs.k8s.io/scheduler-plugins/apis/scheduling/v1alpha1"
 )
 
+// CreateFakeGroup wraps an arbitrary pod in a fake group for fluence to schedule
+// This happens only in PreFilter so we already sorted
+func CreateFakeGroup(pod *corev1.Pod) *sched.PodGroup {
+	groupName := fmt.Sprintf("fluence-solo-%s-%s", pod.Namespace, pod.Name)
+	return &sched.PodGroup{
+		ObjectMeta: metav1.ObjectMeta{
+			Name:      groupName,
+			Namespace: pod.Namespace,
+		},
+		Spec: sched.PodGroupSpec{MinMember: int32(1)},
+	}
+}
+
 // GetCreationTimestamp first tries the fluence group, then falls back to the initial attempt timestamp
 // This is the only update we have made to the upstream PodGroupManager, because we are expecting
 // a MicroTime and not a time.Time.
 func GetCreationTimestamp(groupName string, pg *sched.PodGroup, podInfo *framework.QueuedPodInfo) metav1.MicroTime {
 
+	// Don't try to get a time for a pod group that does not exist
+	if pg == nil {
+		return metav1.NewMicroTime(*podInfo.InitialAttemptTimestamp)
+	}
+
 	// IsZero is an indicator if this was actually set
 	// If the group label was present and we have a group, this will be true
 	if !pg.Status.ScheduleStartTime.IsZero() {

From 0e472595f428876123fecc1e0d4c807f2b3edca6 Mon Sep 17 00:00:00 2001
From: vsoch <vsoch@users.noreply.github.com>
Date: Mon, 19 Feb 2024 03:41:29 -0700
Subject: [PATCH 18/28] bug: the metav1.MicroTime was not being set

Problem: I noticed in testing that the time only had granularity down to the second.
Solution: It appears that when we do a create of the PodGroup from the reconciler watch,
the metadata (beyond name and namespace) does not stick. I am not sure why, but the labels
are still retrievable from the pods (via the mutating webhook) after. So instead, we need
to get the size and creation timestamp at the first hit in reconcile, which (given how that
works) should still somewhat honor the order. I did try adding the timestamp to a label
but it got hairy really quickly (kept me up about 3 hours longer than I intended to!) The
good news now is that I see the microseconds in the Schedule Start Time, so we should be
almost ready to test this on a GCP cluster. I also had lots of time waiting for the containers
to rebuild so I made a diagram of how it is currently working. I have some concerns about
the internal state of fluxion (my kind cluster stopped working after some hours and I do not
know why) but we can address them later. We mostly need to see if there are jobs that are being
forgotten, etc.

Signed-off-by: vsoch <vsoch@users.noreply.github.com>
---
 README.md                                     |   1 +
 docs/README.md                                |  24 ++++++
 docs/images/fluence-design.png                | Bin 0 -> 87714 bytes
 hack/quick-build.sh                           |   2 +-
 .../pkg/controllers/podgroup_controller.go    |  75 +++++++++++++++---
 .../pkg/fluence/labels/labels.go              |  24 ++++++
 6 files changed, 112 insertions(+), 14 deletions(-)
 create mode 100644 docs/images/fluence-design.png

diff --git a/README.md b/README.md
index 8922078..ae420fd 100644
--- a/README.md
+++ b/README.md
@@ -525,6 +525,7 @@ kind create cluster --config ./kind-config.yaml
  - Try what [kueue does](https://github.com/kubernetes-sigs/kueue/blob/6d57813a52066dab412735deeeb60ebb0cdb8e8e/cmd/kueue/main.go#L146-L155) to not require cert-manager.
  - Try other strategies for setting owner references (so cleans up when owner deleted)
    - When that is done, add tests for deletion of pod group (the current method is not perfect and needs improvement)
+- We really need to see the state of fluxion - I had this running for about 6 hours in kind, and at some point it just stopped working. I deleted and re-created the cluster and it was restored. It could be a development hiccup but would be good to know!
 
 #### Components
 
diff --git a/docs/README.md b/docs/README.md
index 155ffc8..c4718d6 100644
--- a/docs/README.md
+++ b/docs/README.md
@@ -1,5 +1,29 @@
 # Development Notes
 
+## Design
+
+![images/fluence-design.png](images/fluence-design.png)
+
+The picture above shows the fluence custom scheduler, which uses the Flux Framework component "fluxion" Go bindings in a custom Kubernetes scheduler. In the above, we see two pods running in a Kubernetes cluster that are intended for scheduling. The fluence pod (beige) has the fluence-sidecar and the fluence-scheduler, 2 containers. The controller pod has the fluence controller (1 container). Generally speaking, the containers are responsible for the following:
+
+- **fluence-controller**: watches for incoming pods and abstractions with pods (e.g., job) to create corresponding pod groups with names, sizes, and timestamps
+- **fluence-scheduler**: provides the expected scheduling plugin with functions to sort, pre-filter, etc. the queue of pods is essentially moving through here
+- **fluence-sidecar**: the fluxion GRPC service that is queried by the fluence-scheduler to request an allocation for a pod group
+
+Both the controller and scheduler logic are bootstrapped from the same underlying kubernetes-sigs project, the scheduler-plugins, despite being in different pods (green). For steps, scheduling works as follows. Note that it is [much more complicated than this](https://kubernetes.io/docs/concepts/scheduling-eviction/scheduling-framework/), but we explain the high level details.
+
+1. A user submits a job to their cluster with kubectl after installing fluence with helm charts.
+2. The mutating webhook provided by the fluence-controller intercepts the job and adds labels
+3. The controller for PodGroup (an abstraction that holds a name, size, and time created to describe one or more pods) is watching for pod events
+4. When a pod is creating (it shows up as Pending or other in the cluster, and doesn't have to be scheduled yet) it starts to reconcile
+5. The reconcile ensures that the PodGroup is created and updated with the correct metadata and statuses (and cleaned up when the time comes)
+6. As soon as the Pod is pending and the group exists, it starts going through the scheduling queue and hits the fluence-scheduler endpoints
+7. The fluence-scheduler uses the PodGroup name to associate each individual pod with a group and start time, allowing to sort them together
+8. They are sorted together, down to the MicroSecond, and Created to run on the cluster
+9. When the top level abstraction cleans up and the PodGroup size is equal to the number of pods finished or failed, the PodGroup cleans up
+
+The result is (hopefully) a smooth and efficient scheduling experience. We are still working on it.
+
 ## Thinking
 
 > Updated February 15, 2024
diff --git a/docs/images/fluence-design.png b/docs/images/fluence-design.png
new file mode 100644
index 0000000000000000000000000000000000000000..c35d9fed86830e92f21fe4d3f4cee782958f18b4
GIT binary patch
literal 87714
zcmeGDRZv{-7X^sYjk~*BaMvKAad!*u1h*i;A$V|iXq@1|og_$*08Oyq9^5tPoc#VX
zRWpxMb??iqo0lYgx{rL{-fOMB_C7HhYVw$9WN2`3aF~h;GFotO@L%EJz!WH8;FDGf
z4R$!V5I98{Nu3X-NBPJf2nNZAO86x^3HHsUzk=y16}YsHqK7q0mAQ(1q|kfldop3P
zSF8S~jNo4~vNE_JN&4I1rb1GtCZ8Yq#uw`APvUE-?_6!Yy}b`ao0@Xrm}tXsq)=!F
z9KkBcz~8`sqWzhnsQ-O>4nnj0-)8|K;GBP73&){`hP?XUFOdKLU;F>|f}Cft>DSGC
z)~+z_@SvroEvNpsEmY)|^&zHxL4hHOo_jFKM3ABUGVryUu2hn8i~dMW^aHbkfQym5
z*YAVwHlo6iFhzV$@_cRCOFo=$rW`R9e6XCaGbtDP?^M{#P{rmgNys52U9VDOV>p@q
z&)+YRrHZLlGYJ&0e_j3C>ftRAB7cl`HvvrQ@$UTLYTrr@S2UzIQ)VcZ_!L$o{ePWm
zGcshg)j4<n7?A+>1%VM-7<gl_@^mm;&7_u>B@rCd<9D6Hq+a)lyxCz^37b^8J?P0V
zSJ0UP>qnJtwTi^Q=n@mufTY^}$PbzOdp&PPF62_9M4^%^IF2FnZ>90jf!Lc$5CR*L
zya{M$y?R&R`zO;Zh)4Rr$b9VctwFq6>GE-uy%-l-%UUJ7+?~ucf4aZsd*a8U68qh3
zKP~2aA@+AW)nlLE{#P1@k@xm+a^KTUE~!XL3_g?6w&Q#qVdn8tn|`e!p@%kI47Eg1
zC>XKR>(Kjnv8A%2bA7ePf4W?i&8#=zVCM7A)%Hj#F^8e&?*@y$m*+0h#UbE$k<a^`
zQO4bmp8LOaD>W>Ret&6mS>cuTkGchu=~U~fnsmCZ^_I5{0T(oEb9p+buBQuT5Oe9!
z`K%R&jNKdb)JZDB+UB}OCE%d5|Er94ddHZ66s=CTT5or<P)J(7(SG_B3ybmm)%5R1
z8|CYRS>1C+)f|Gqi6Un|sDl+kuqZ_o#MXiyoM*K2O}e}Z>!&N!W6=+&l=y5$)9{Q^
z7!;M#m_vW*e8B}4oFNieGk%1!%*>F@Wb?sTVxcGYpy$8ujAaLHk)@pHY7G9^6KN>p
zAyqMpiBaH*jb@XvXq9TvEisY{Isw5F<!i`5%4>br;Cl;P{Bf@&@#lN~Ulr<oc2jhh
z{9I>>5VKBC7RiqYcyHcK<dwG#j^_#`+pCM65A?;vqVYN`x4ZwbPlh_!{LGpXV@jzg
z@Ecv}aLeRgXtd#bV(0!*sa2*lbN7t+?s&Na|C_1bKBN0W)4N36{=@n{PSac=*IxH6
zxekw=XK@MENr{(7AXDO||6vy%yavSI3w08FKbp?Q)qVEylafN7@RtoR5*CGB0fjCk
z_<B~~@6LK8WnTim#!fv?*h0<%7Z0y^&JyuaC5yXgqtW~u_)?|f^6~GbL6hyQG*Wvf
zqTL*i)le3b1WF+SKeJhHcV}^+ILZ5TeX~GaM4Tf<#C%a7!3jj8Hka8;2H%Tq;KuxJ
zSA2N7U}~6IB3@O#f5H&1di-xc5wH}?Uu>tU+qP+NDyA?j^!T?4f&ErGy%f?#aP8I7
zn3L~WXh?Au<S_mrZHx;uw5Ky18T2%77TojqPcCc0Cqgz(qt>IF<E2gXCuD<~W)_0k
zp?LBVxwHC^jGZB19jlZxIe{g|HIr$v9Q*`EM3XV4g}di<K)^j(>oePzs%%c66s;16
zlXyInetCXMA5h8R-v<KV`u*u@PcD->)p>+WomSZ>kiMtO9rV5LCJQ1?^zZTTmN_59
zwyHyhb{NFoc7OPC;syK~k5P$v4ULeM-MC%DS+yhJ@5SC!$@Peeon587-^p^vAlv&t
zKigH?nb+v$6Sl`P)dRFE)STzb3lKe*TAc@pOI35dfka`o08%CA_4{%LcHq`0#c%Tv
zns!U^U|OU&la-n!8Xq?wfPI%**g8m_N8JNpaR%i-SUCP~ud%^?d$6KSM4Q+CH%|Tf
zAq4)1wQUT)$z8iG?pQ&T=AVr+Up7w?C7zkzHbBEe(fT}gKH<^?KR<5f(#d{;N5&%Q
zH6KY~Jl`Bl<p$Df8i1<<#*-L4Mk($U<@3u)iQVyB{f~rfdOxpjqU*tAd^o7o=0;6X
z0R%qQ-|x=1ytoj{Daw@6Q_NvC!7op37a{?74WE23|BNpxzJx4=+or<-2`(5Qj<N^r
z&1Y1Vo1=x}+}!+(I6bozA~6)ZBzk!)8Ak-TwcwY)PS1S?j8}sHgVy5gc3LQHL>SdT
zZ{R~5i6sQG))y?HoWZ_{NLuAjYTW(d{BuP?N)j?Q)l82kNZfJ&WkvZN0935Mf#~((
z60a8vc&e2QL&8JieL7j`+Pnh-sZ23dZkdNP+E0!^t?*54Yjh=|hwGUxL|RYj4sJ;>
zPT$m5c3He{$me)bK!t&!w0FDDauCfuU^9m4dw+im-?Po#*~TEpVw<b$2VWeRxc`j~
z4-h&_|4vc8vm1hpYKQIRF`Pup`Wr}DRo8#;F2m$54e{^){lzN&-&07p#QJXz#8BVG
z#{vPZv<}?i{)rOAKLNqgB<B0`)5D>uY7RfH4~l5ux7l^zo>vHB7K@5JeMi$+(yP3G
z_@DMLg&ubtgUk@Grb^@)#ZWLXbWSurZIW=Ac311yJG^U$qz7eknxxT})_(dc@yI5L
z4K_Ru;?%Fp+pr0#(Y8OMftm$g>-H-n<C74^?A|K<!;VcKX6VaCAL`!u){oc+BiEj?
zt}X^);kV%VS|fcQeQ>AS#`n%H%9e#rs~-uSU5nIO02i;UkRxGJvF~QE>W*GDO?}p8
zLR|*%d3sX37tJoPB%=41nCq&Td%%8KX7hR-DW7c|!t+tnWZD*h2Q#<pra^ygCL>&a
zgoUW3v!;LW{SZkL9uc8943`M*1mGt!6U$cnz#gt~@IOXY@Wc~B@cetRv$NGs1@Kxb
zt~3V99;ZoX^aDCqxA_Unm-BDP)C%kBZ~n?8mg<#;)aiUqIdQ1~;0~`*HCM2+>wddo
ztUlmFxoj+vhYOIQ;{;0@Y%l?b8E$%y#Covbj|56QD$mnZlMZY$egz7?cN0$KKaqH~
z!Q`@nPQSSceM4Hz7B<|jPS+&(`DZTMzUH6&$Gy7Yj&n6rJx7m^my@gw$e@)9b?W{x
zh@!Vop*#ZsW(xecLI}<8ybcpca9|uptvWm(CLd1w>g9_OJkwcqJ$*#WlruAuB16=2
z_&>LIJ%oHgKA5Xz+r1wkHVd$j+om!@Y&Nt<Dv^uVI!PKFUGZNHyfxuCL7^F)eMmzB
z_)^-veQDrG4f1j^#>I-{v&&j%)>jAi$AJxTQi>j!ebs>A1Chvsf!M5{>Ba<|y{J{c
zGl57vT}^L3+o9~ataN@xuQh5@>yW-<^*Q~5ZdY+VC}Qn@u{|;lOP;AWd#Uex=AH0h
zg1RoZ7pX0Ov5GpeiTY0)#~l6#Lq6cn_q!9DrlktWm!?+TaJOe*!@!xJ&pR#1#ed*-
zM$`H54+;u{?nmN=fza5Kia=Am1BBTVaKED_fz)KI(<({+GxBN2;Zaf4!fxijN)>_o
z;nXgVGaM!+<F_*p_hEw4w#@d)8utctoY){6RB%dyRDtXokfm}21@2BzdZ7lEO()#L
z<<Ua>p6?yqCv%1BPfqZ$xu9nL#{%xRkAJe2X=b*Uk$b=26`m_Xs71Y1Vl=JJNt85~
zo8C><bv?v(`J7JVz1Du3Cug8%XIH^qSA&FI?N7(*_XRy+aKS$jD*jg)FmPVHMc;e4
zJ!Mizf_(pQ7tpBvppZ=qP3Cr22T1gZdfs=WAKfmG`2+~o>6IRf*(zNR7vPaz?zXU}
zm#9B*K{e?BU6TA2)G8I)O5~$p^w5`bP7FvL{g)PZKv;EVB7<sIPXpG2vo-6bkp8!Q
z?y2$y|1r47Js>-r?q!Rgt0SI#d%%XOJ@0K%_JF5m^E#N>y#C5%?zGaW+xxA?RoU%c
z8d6=q9||PgfvE_9o+B>F03DeHKj~5c6j#!!u_S?53GdbAduiJd0Vt^PEAPZxu%UB0
zh^zN*6ZlyioP+Q0J7hL*?~`Q~#aW-+e^g2lA8hM#HX(XCth?CaSkVQ9THyT^E?4ve
zASB6qKLH}eb|TN<|8eU+N53`If_KvORr(a(yga@~O{J4X^LfeLiskCnVBtaFw_EG&
z`M{`__q(5%WG0jGpPUEemFBJA2N0$}jf~s#<6jd_hPB>6QT;k&yrFjV>)^M5whp>G
zvjC7E2v)qG^E=+I2>jM{{ToT<pt+M>)z(wIj{tutg&?5fK_jzPlWTx5mo0Pc2dFht
zID_3l3!N(B?98pqJ>GQ$Ky+eGV-0HCAp%5+csMo3x{K$BQ)QCS|Al(2*eKZV&(Ca+
zdwg}VFHQ@*(@Rnjpa?WVUO}YCn<dvqYzC{LIJ_ZFXz<^iOqFcj7*}A^<r66EA0_&L
za8b_{<N$;Z9#V?QtrxH_VRa^5pY%goV%L{D-glk?7?9*a_%*2j=gNM%oRCX};gw33
zIs4P|)9u>Uk3@AqAccQ6>1sWBfQX^&0pRqrP=Xq_>Je8ik;*xo07&BN%~&p_EI<dL
zv1&gl%MN}-M4#z%Vh6J5qy^Ytub5WnrNzD`P^Rs8&S&l+k6vUDCK*s}s14++W%J6G
zVd`1}LIBWkp_z*fRFMDWmnxMF=xJJALelE90*xvS9Y8U4QK4D^&wqDyNx}inzXhb<
z3J@qt>8vqW%IU1Lz|t~=m_S5pSHPXmVR3bDzgU`^n`>xjv^dOqY!4T816tX2O>CVQ
zgc)-KtXnm@I8dIz5?~)GMZIe@ia!481-Y2=%eWj`0Nb0elo{YAOyRJFFr2~|8+B^f
zE`w_43Q_aZ3y;F4)_YOa_#S*UCC_2l3~`ySG0+}4ggY!uEqoKWC~Oj3aJKw@61XfH
zek7ohr5f9u7POrAQKRp<Fc2rfWSgeMKR|km8eNS509**VZw_=uSNvD$Z_*_;h9Ldp
z_hgit7jTd3+MAZd1uXt6Kz&L}OYc-DgD`>msO;3)YXo2)x2rwPOBD%JyNVVf)0($D
zUavFx?O-%wV#{LFmVhQX(8fC%&KGSvw?K0}(}Mg+q^3T&0c6Ubzk<*rud+itl)x@A
zFGC{(K)An)Yz?jH^}Q^2uX_i=1Y)Fo^;L5!r-_I)Zy)@Bkmp6^4mxh`<*!gS>Iy-^
zi5U8$15!HtnaziC2Kje@6|?6=OHSGq5gA#!Gkf>#oieZ)JwyO;&+1zcx^|yOb_KG3
z0t!u_Tw|1u#P~BZXL-E-Ei8V8#dOmEvOAo-)sHPf@h0GIXDoA^Z{-re8w?U|8vmOk
zThF<mTtH3ve;7Xpv<g0x+JN;}v1vLWBhO^jaw>djp%DPR7KwazX2>Gho-FKBFMM-x
zu{T?-KihJ1y5^dv{J*dY0?PSFDXkaut-gIh%02VHkKcjf5P*h%<vXAsCEcX6$Fuq5
z&Cnul0@s6gG=SS7$w5lSpcLU}U>Mk`YMN+qoM*T|e)dWEhZ0g`Yv&E`fl?jNUDy=D
z4Q72o!b$caw-f-I48@TO6L%6-_z&~D-u>MzWL!YHVO)I)S>A_AhCxpO+3mXrqusOO
zOCnb*5R3UJr-+D%7!SymjqeddtM2d&Z?#U!=Axos05rgqw=Tzntp!}hZ}{(w5_Sy0
zn@x9n?x%!zdmU!%%L9y*w8wA*@C23ORbsZ-<Ikjjbvh3=B0p`=IzT2ZbYJ>9D(?XL
zM<-yW0e9L>75`7~Zh{G5;dh|`asp|uk>;MwYjZ?B{NC{3H8;1$cZA>1lXU;gi%qnD
zUyDM*Bx80rhC)J<l9C!48V={{&APmgfzlkPF?1a#@xHF?YW!Cwj1`5v8jvvV@e?p=
zbyA}9rvdN?5Qn^zO#g%l#Xo54a|d|pcGZ7!W=!+5amNL`MB6{zn)Dmv-%Z=zo~!^y
z0GmmtC!pP%Y{oKJv|mvGK0+&y_dp;EN&nLp40ZpbW57tm{vTZdk`f-c)&E?IJ2>D>
z|NZmL|4`-s)z?zx^Oo9N|1Ey`;no0G-{Zw@VQ@~}^ZNbuVer#+<~~^k<lddQyjtF6
zwfip+t66oxwWyeY2<r`g2?7Yi-xm;pj3~YUQ&3Qgcn-&H7NKHe1BQk#@NmG2uXg#k
zwwtdHwEP**eTNjEPNQDT@-8ofU0c@t-NViCnYh<M+V<}ngC@^4bYjjpGJz_u!@1$#
zPY<{KHbB*c;(dLfEB?Fxaw}0{?w^jz7hzVKKBa{?Rbr<ysSlku+KeT67!PBVo&!J{
z5gy(QxFG;~C}4jdGm&7}0*Y8}^N@YAbOFaXVTy!*<u+As6JU9iQrIv_xNk)p@X``B
z0`5ZTP1{-!pEy5k1jt-OANPxG#py{*U=!+1d;BWY@>LSweg#m)eGI6kdNz6;W?9=O
zvv{^n`(6aQmuV|L)Xj$P4JV1)|6-W#SBHX2=CAEJXmTP6rNV!+LF-Mr=8U(h>wCWo
zBcB(Ey8Lbc(N?JDBGtS0J<S0tMk?;dZy7luO+T=XkO?IIR~sNbY+-K|uXZqgclmJ9
znSHU0*tg&bxO2NJ-c6yHM!W(j&0)6cdMDHDrZJ>F+$xbue0}qF=V#!<&BlSGq-6Pz
z{!n(=W?mi7lVwttTUf2M-CU(6{Dx7BL&O6n28QQbc9YH-j05y`x?<VhY${Ds9!rF}
zh|qznGs@^|z(8B=^imGOBI8Hlk=B(*C2)i^Im4i#z)FhxoX}tcS@8mtu(?4lD43Xo
zCW+J%t5?y8%}xt=o2K=jVElG|3T|$04eHf@?0Bc9sGcSOeuR}z6o@<e4X6U45Vh11
z_|UqKOMG|Ib*eG_N-fXM?+(=tj^AN=mVS!A28V9I6y)3hcqaT4Kt5zqsl5ZJ&|$7-
ze~nSDqU@4}I1-b*TvY0ch7_QxIZ7*zsWd(*1qGBwI(GJ-R}D$86Zv?5c%QD;b|;k$
zzqPLS_zU0y;K!}BIO^|g%EM|Odf-jkT=MPYHNOD)$0IGn`)-0F{0LSQ_GZXx6IuT1
zs9;`R9Pk-P<!7>EluuTB9L-W5T_K5<SkNH4Kh!~Yl9!3HkdM%W>(jNhtA^0Ox$1t1
zwfh%4W95$Q>$hvcKR6DRw-l<_K|oZGR;a;+@(jk8-|U&eve%J-<kFv}clqejq?r1u
z+>ruQ^8RYP5{R{fvvWnmW-PXF;jN^Qp|{aD@I2P-CG}A_`>Q=&(}u*xjn=3PgTVd(
z327hhX_!OehXo?yG6f7omT6JEftcQPz|$BatWe4RxCJQ2*!d({X>O%`e}~C@(I~h`
zyiV_9G71}L#KhP0iG0y(xOKpGbMwho8w7NQ8{I2xg-am!0sU%)&>sBq3=jtqr!mUw
zSh!AtGuH{gU$Yv|5kSEqBl80wy8Q~Ua{yaIp@?*5U&e*zP^OpWhg%$myWAf_5djQ5
zBX)*ZPSfsaZh)t?c@%IdfWn{y1sRJ%!v=Jmz@0*Z`0I0FwEs7Cf>0`?4ul^aK&6u_
zguUR%PG#2a2I?JlKv-&b-va0t5b&-}VUiLF2=vo3gT*O(y=VkH4$1&9AK}%?CsGN!
z10Hl|$!<7ljcL!K#kdL%MzA}YTJtH!LZ?@C!3P8gl}N2GqEY}rSa*k77F%-~q#Bz^
z55o`?Rb#ApsQ?~f+RyEhgh7)9;vTf0m7{$D%aHRH4!TanVHlp3+~dQ6lYX9u&|+AZ
zP)T)%KvWCI$T^NM3!jcGk!;wm0LK8I8S+zVN4}RfOXYFEq#X3i7NF<aAM7-0vq9Hn
zd(EsQ9>496?WDH8n{3VX{{2Hv2nkimc-0D+Ah-d{0+ZVAEC$OF%`THhe{s|4FFr?4
zA9$Be${@-;SDM?ox2xJGqFJL;%u-;h!Z86TNce+Ey&w$%LPB~xY`-Js0a3yqbiSHo
z7MAgUy7wwI_QM`AHAXpU7j3uV{G`&)m_RL2SID;hY1nt>7~t^|szuRNxp=Z7i3LlG
zqyBHRnOtTGvBmx~Y*k=FLi-H3%H#4j%qtEBjz;iCd<1;v6NVB+!bQ01R~4#jc~;hg
zqE$lr2xd!b(N6RZ%D|X!_5yE9iS*^WzV;lNsvY_Qk_S0oZ%axd#J6h{GfjY%N=*jo
zjT;L{mcE7HsXG}C^rfgknRmv86Xd1tHwf_EhGF!8iBVDlR*a06yLqtS#}mZ1FpnP*
zZjP0<qTWZ4e`b0*95#h_eqAIXh!A@Png*^a<9X0Z$G?_u{BFY}7tM!-x(5XBKU;+w
z6~Q;_4WPHbSp5*;|EI)#)a@7mQ*H)61h94-d7fh9s6_x#Ou)t5m%(9l5Q>aRX071x
zZu>@~%vOeDKVMjmJ})9!y5n~!irw#qq5=$K!%WLPnj~&<xD&$`Z;3w`_kMQnJLoHM
zM*Zz&je~Fp_+SN3)Rdx558u6ddIJR72rHA!Jc-~{+}Gi38u$0IHt!Vx4AX(LR*S--
zOtN>dX$Ha_P|#XN61eb4NbP&}cB|bTtFlt`o3^y_Kxqy|me#NP4vkzth{hBb##-))
zh#Y{XnYa{5qPf%QWIkTKviRp*c)SIZeY<Lk#HOzMPBJFg*cc5*(w4;Sg?6G(y{nOp
zj6rhd8iyqhv_`CFDYoLnzj5!ww#oizRuZ=343Uff$q{fYX4d_gL<7{Bn!Q+#!D&uS
zn&|q&KLLS!V3A~-nJNMT%ii#?akumj$HNP}|N10SgBIx`^F4a;E6Q>7G8{)O)$Tzu
zJc@Apm?JiGxro=n-fS-*m)a5S?3O<(h0q@~@m3P4eDO5%w80S>=D{|Sgqc7n58iE5
zbmbsH4=9r7Wl`y{HF0hz^^|t@5{5~IrM`}+vpD9(F2nFc9H4?{!nG89_taaTE`H-=
z)03aU4d5(KZO|ZVAly|KCcJ`9cp1Kz&@}}Iu^!u<s?1cg>>*q4n^XW&)q#|1R(DR`
zQ)i_W2q$(!K}!}}d+cBh`0F4!$Zdi->o;NU99A@)acl0zJyW=2scwzE8&0xuv-n6L
z@xIf`#lcR=Y(jAnWIr=4^|Rp*^JlxWXh>X>zTsyS*#<cHGn|}z2?sP-4wrnestqwj
z8%G5@#DBifMiSKKyrhTCZ9SZ{KV2c({PW88cMCR~nj_fBS^!OrI;YHoZoEAZ)%>R*
zs^$PD2$M9_FRp0*DmowwtCr7wQ-)vcie+zZYD%<ufmKJXTCWyB+bB-Fc+d)_9GA)!
z<swcs{h{+-4Lm%2aAQ(7=XOH>LC4kCJp@d}<7V-<180ea2a+!Z;{Gs9)hzCfHGHS_
zV!PjaDb`*%CVg=MSx?Gv%dbvQ^yQIhsdGq#TqwYm;!Mw@q{g@sMKVQzAMJwX06`Ko
zTO*c4`qU^1X0Wo6MKA!#_h6?FxGuFGucg)|%(hozoY6&UH~@_@`R{4M4X1b+=~i+0
z?W%Wlr5OEwB-YUc;=Dv1pX_$T#U9}HNsYfpFTk%d{DfnXAx`X!+P07jFa8$IMvq_G
z5zdq0Y;E|f#(<BXk=fRG5743+=$~2eDP;XE2Jkvw=}IXXbIeGdiZmB#x)AYf5dnH4
zRzV~G{^s})b|XW_#8UzpFXVfN<&ey7Dit{T;Z+%uyd5uwdDXo|<a)zQ;v+u<F_Kx5
zMcTip0j9!z`HcYg^X!lL^>y=bNre%%QW?G5jfAUVFv&y&%215oBg))QDI(%Bq$<&m
zT%X|f<)7C(vxKC~J~@5QuTqeyF0vQ}AE&eFCn<Z(Ul2u6ZX+?^HcEX$MdbN#W**gA
z?jrw3tR2CFs^imx?0#-&p4vc+l4Xs|@ED#$6}>oWqdVYu@J2|_hk9?oOkch>;|zu$
zM4=*Gkoux*d|X8$E!KJCf-jMLyeF-p#kxdpNFx=^u}w0Op1*Tn;I;4Vrd??$j7uo@
zz0#B*KZbolN=W$~-A;5L-MdV7gO8ku&P0lsXlNdGsr*;8l~jPaka0yb&CMk>wM1-(
zU|+y2zYYH#$|O^WN^Ee{e9H&!q;CNc8-#aBdn>-8$ICXUw154JtBKuCug6seT^5Os
zD*VSW4I7L<ZZwRDX(z%{>5aMNp9a&;uY^TdxP<oz*;^d};QeL>mJ_f^oeE#l@m<Wh
zRKP~`FPU;lX)A>kLd#OL9k#HNW<+arhu)}G8uAzJNF}b&86^^96EO6q*aiBj7`!fr
zgOpy{8_}D)kZUpjrUoht?I|hcZh>rHm?MnH!RyN=R+p3}?e6)y5f@NDB>l((ni)2s
z2wQdgxC<@&u^_ll-->GAK~0KIxrt0*sB}L83&=|zv6CapnN}}jG%eiAu-2p#o<j$L
zckh6+@hp-AmS1G`YaZK}PD;Cw)atDiH7n=d1nhNj<`i{riEBiOpTz{%>#zU_97gBR
zwBl6^N)urDtW1$xI|)*a(hFK$rRcu5as+X{GuH7WSCf}O?}*gYSaWbX!Xp}wa+=}_
z$LAqq`4XW<{X=d@q;|f^t?mi871{ds5-46&u*rdBSod-4XZ!SLb1=5Ei@l{PmTXLr
zPMG@;_6~eeYPtS3^g>(DJ&u@yG48pp6hrMG`3dN6P|P7f`Vg~)5=eNYf<`^K%H>!?
z_&ko}Bb8H<bRK0Iuemj(PV|r&Nwbm<5|U`J4&CD)Wm0Iax!uSTS2hyPsTTD+hDY1{
z*w^C%Y&4G7Gs!AFNoH#M{WPHUV|1}@9J7zKJ`}A52w_48tjqCH>u3w|rS(F}Vw?{U
zP|2H3nLr#bL`3`g*B8*Qy`u@_^JW=IEX|#&An`={#sY<x>0(**r=k4hSx829{+y9n
zW!ccgr^1>Z$h9+<%cr{xtK~NrIN7$IDGIOYboSF0T?V+SOC#xX_@s*U-Q_0I;%dL(
zB<z{!UeEoLaQ!zf6el|T8GMa}->jvv&5dOj+gE*7dIQPzXUwp^vE%x@JDN^45^RMk
z*k~9%+C<j()zK$v!)mT(wS938E@**DP{=KvRyuMo!LG`oG)dne?l1WaH?L#?mZJVq
z2ZsXTFX|oCN@a%=hFpeOybv0pO5W))i+K_~jWrlEw@T$b{cwrc2(wRmDo!SV)*R>z
zH&6L++SZ=<k&VeWuMRU|4t7a5En53pJ4<BV+zg^l*w-?6``BGJzABkBYL43B^bS6M
z;*PAR`k<Irt+k8X1pP6U!#%?-uvU{d1DS_@&p8EV9&#xbp6@1xJD!6-<f2(MbsJYg
z(0JU6&88`V)+V~U6-HqrlB=;!#%tW#C&@l`%g?lEpU|X6dlQW_PD<g^IahmIkapjk
zrDFZF*gRz@SKOD4=>eaENT*vdE40gVROdb5NnB{pebuo^U5Nd%qCN1%X-h70>=h&Z
zF6|w~;%4DASsu;24)@==C?0%#5XTnd6;C9iNiA9uE=pK;N}&hZM+>o_bw7y-+3_zD
zS$Nmn9@`3keSvx)s#aWt{_ypMOvf-D6pzhr*iQ1|N$K0~7aBq*pcH2fna0Dk({m8V
zyT+F_pKk=}dAWFjArI`~OS`S?IH{Wheuo*9S|nX%hlD39$qMyCJ;vfBOt_22@M%`Z
z=mMZ#Ikd=2c0_B`Vtz_=-<lX^qWYydX4MafO>zrsKP&ZIcGxISDc^-g;RwnecCui)
z{XIMaNEdutoWVz^6}nSY_>&H{2^fvL<7p&;xK*J~0!R^4lPJGxW{`~cKhXM`xUdt!
zATyupX{u&=XEhAttE?K|OD7tr@yi4QJuJ-~W6tGh2H7PEzpKhXqEHO^eSYV~8EG$*
z4v(@PB@$<l>g*ea*R_h^s=8?<r0>-%ZBnN88rl2f4UW`$*5^jzwwmUDwxaX&6?Tl9
z?UHdL|BI!SS|g#TYW-T*F5h|6xw7qR-u;ec3#TM41uMSZgzy_P@dN#0TZ6)A-qqBv
zcN=akSIk=2x5bu8Ej;4l?UehU>kZ7#@#UEb^9aevsye9*Oa%3)y&lu~V2XPp$ANWY
z$ltJKhHU9|i?gpr;7S?rTooKDyfjUkQ5(_r+ywf5WfE=UE8obXSF{9w0jdIC!Qt-`
za=79l3`;AVV4q<k2NMLSPr{vxS-mL^Tf*@9<XAXqyGjM$0Vm`^DfejLPN+GerfSp{
zgY-JSf@nfPk(F+V^tJH*Xba$2-qa&2Af`H}X^r0etvXMGD>iWM8)8f{H@;#z7GS3&
zJ)W5`iQwrSMJqNrj_K-+v5dVY1<iUC)g~haet*>^ri+<|X3M^>NHw~QM2aZ>u6|?>
zwC)nTS1$~VD*^LDfTdAZZuCpDJz!;;v_o^rqNg@A2rnIt1BFjlx`u>p;G-CVM(P**
zxoX6lkq(4ZFTMTXW!e}59%nH(H@O4C5=X*o`cc${>Km0M&9%_viRrAAk?&EWGXq*T
zvFMeZ%<`G1ov-OL8RE+f;Cl)+hhOmv^ezMT$lNvMTt7&MVY`8I)TY-Wz(#70>&P`{
zbXmP;s=9t1?bp?n(7a8~cG2j_Hq0VkGHZpFQ7Ji=LVtXXF{@sLYKEvv{)DxaUfa$W
z;vlBsvB*39ALUg(wv|K@W&%LtGXMfnx&*3%r_HA6JuDs8h*1_QR)y2X;efuH={i~$
zGcnZTic)`K_<8XT8Er2SL-9B+puw&tvRHLg+Gr1hkZSa&LgKU9P~n{D-Np3>UtAeQ
z^(#b}9Mx5|na3Ysz6dAbhVTu;(oWbqGM&}s$I8zsk|sAi=z#O(^E=WlQ@zlgp4acS
zQzCla+-M!UlW?tkGfI=k&S<6Nv%#Q_E8S>CLj3G2#a5F?UpYDSOi`?BbvyrTZg|b?
zA$ZSs+wdBnXNSnLRN#*+)sWN{;%NQFj+&H6ziz+BpBVn8=*)C?+_qM=Snwi3Pdp^^
z+?nPAmosEx)Fb^^$q=m}XezC?pzoHSyjA)1fXa(jg-PC6=TzwY2?!IZ)wndoK42zm
ztNcp;?pOeau*Q9B=%9kemvK?n*5?K_h8gbx9zpT~?YqnU)qa)uT(!P>i?L{9GC@Rj
zhE-Tt7-g%jh_8mZSb^X`hFDFTy1Y${rcxE)peZCO<zh8|X3%QI4s`SnYmKT_zx;Hc
zOhj8<ly#S_X;c}%a$znR9I_sF<geKXj5OU}!4pz>VK=JJvB3C^9qjC@hL+-yDv>bB
z1^Fq;d}f8#Eor2=qDDhLw|8^KEhMmN_<CFa6tW&4T#XtwD!6b7I}+hr#hS@2pt+>>
z*PZ!0cmH;}!J=QInlb~HNs&rs*^O>@_O+Ep+eSyZNr%1kB{L@bsan)Sl$Fl}tBZF*
zqrc9R-QjKWID_+2s{of%k?j;G8x!qGG(6bQGvo=ihR-9a_?!;<MeLO;(ZdXbagc<T
z9$GvmHp#Cmt}PJ`#i%JnXI-<0Z|j&WAw~K#1w2*0@I&2z7p@b({1WwB?3DRTd1AOY
z+Jr>W%yn(_1_Fp5Eyl3UXGuEJDoj!)`s^T0>uX?a#a$jynnPxuYxhFe)|m}{S!~4v
z&l4E|7;5R!-Ae=l(TCO&IJm(#lf^~e7G#k*^t>wom<&g1N~_Ug^VDihZpPb?`%J?z
z$Z>p)eoi`F-XF#aDwD8w(=tQ-%DmIyoa=4aMUd8zBrhWOv%S)(+^0iz&>qvjnD**#
zx|Bo0`942?QikSBp#tE=&rms1`p&ADt;>_N4~wb;j41Q!P@6@wrwU04vB^~vCL5w-
z_%LMSi(~!LTsG}cssQgIc?zK^a+AlQHvGyw%!m2p9z~$kWIKLEp5|-<MAz$@fjIzl
zC|h@W9_GgK6|zt~3918y5<<TODxkFW!dtz#bXHPlUq;U~y~_!G0)dk<cb9dUe0~;)
zFInV%LH~tue7J{3fL|557{&nA9-LWuZk7)nrGL>-WSrfR=t)V;BPm3aH86Lo0{W;w
zQ*DGYzc3E1<TUeWdm)p3RDqKzQQoh-aryDyD5;9^J;MWUlo~K6L)nT7??G}~d}`}0
zbW|K)sYm}ZqKwG~G%tL96Ww7h>#9jD<02z8@4-DJpo9>`dOJ1?E$mjTVx<W>RZ>5O
zcbP1sG~(C<NUIhQA0AWpl=d5H&PhF`8uHujfEPl~k$86i`rT844NtW#-iGc&cH+o|
z)Tl3}T5}DDCjm!yYv_6>UU{zXR{N<PP%$EuRv3|K^1Kvvdic~RLw~AT+cvo?$?4;-
z=I>!Bl=Rdf7}qCS<S!$28Vf7c^=o7A+=;nN*(Ge+1zy1et>Q|@Cs$<_EMH^CHFxHb
z)_Qnfxn7shzRsVr*=l*hQ$@$jXMp>Wq9#8hA0KV-rlWQat1Mua$}by<MCnTXaK%@k
zA|;<T(mPq4>i(hvLrtl!s8hs=e8&bq*-uE**7H8|9ZoJgAzO0%lu57~`>;v-vNPAA
z)=+MZmhVhzvBXLM?YEUzXvu-Ue#f`%#mRd5CXe+B#@yF5$Eq08gk2Fa6oTDkOlWiz
z=mmI{s@l4Q3T)PeMCkGu@l?fy+zeAitU^_-<2hSPy{cERK00A)osLL~(^EZ^BNAkY
z4P+im-|R9CnbJfYCE%Aa&ytgAx(YLMHTwjDbct5t4VlVZo#tP^|AT?|O5g?PI|vIl
zL&ZMud@@Qh5g)>0`~1PfUa|Izjqz%xZf>u}dpFIkjtnPcEnpzkMrv*HI4*CAM7)-t
z@%m0`v8H#te<!Co0BvYz1}yY(oAh<Z*L#KNI4loY@yn^D^%D(i-oWc}j^W4Y-vtR`
zlDhgm%{BWZfoVSlXG{3Dv-)HIiD2t<luNC^6*58$LR+Bq{!47-|B9$M=#bH@6d>=6
zn(ZuvM};e_!&P8BkM+hKc*ztJL1GLH2v)53TwA0`Qma{ltMj<>D#<__5h@roY$~qc
zJP>g7n}=W4KvTQWNA~tpIo5ea`S)Lx?RnVD)<t9-e}T4b^hU00FRnm25|Z(hMUP`d
zFJa~o$5>7?S;|v7-^Dd5a}!{sSsY0!3{zfdV=Z|Lxqkia^m2o9fo!q_IOQ`<9}L(-
z1AtK!Y&;~y5R5cRr-R}~y(%I*_br9j9uO9_g+G<vs8$-uT<Vgs&{DUiXiy`31@oY7
zAw8->t2iGuic3XjODy~*oENGU#)Vm<nwk9Vzv%nd)v$B{uCDg8gnN6K?dBfe*$M^M
z*ZJ`fL^_*hnbAWElN)|36MID?<-4M4_UuG;{ZiKP=ojKdr&YgY-1uBy(VtTYNrTcA
z<tXN)4`-~fCpBb|Kq0g_v@%tT&*YU-Y2qiixIhnzFgv8O6ja3|d-7~nGRDOuOiv8f
zr2E&pAiA^pW;6@YBpG_SPd<K7>Vi^^R4n^nA4CAtXJp+ny6yfSJy-8%XadSEth7S!
z!#;YqFY3__v)nMrHu`<?D|V3~!)+$0B1OP&lr3#gYB-V|VoB;_rcWbth#vNN(iSVO
zgk?+#CM(Q1Po+6r*gH{c4X?tROp9_*S{)^%B6<z8q_J%Nn;s!x<<&E5${hRyOu`gX
zYpV-xUs6`qRelgY55HC~hiWJ5ES!cL#(&W8cPBa^DbL)~Ih^?%Mm4l!P1q!}jjfg~
z?%%rnIa*A&L?)XS?f@Q4V|cZyg*rP;wcr?DTCB8HJWMgd`D7zt;3TKqyvpz*Kc*B5
z`2*FK+OCA-#esI~5XXy{2^>^~dmatty0uF_yup_=6wi5$l_Z8C!tA#!@JFIC`u+K)
ztTDJPMo~B=bU&)uxW#-!)hJ0`8p~1eBLN*1zfmdLQ){PL_O{iT6meAXx4p<wuI$ee
z)D75ODgO8Rv&a3P#0(`zlY{c2Z)V5t(W=N~jP=x{#Bh)i_=m{1bR8w<<d0pCfK+gG
zi8Pe;S&v3Ib^zv4saC6q;vKu|!#e!KhF@xmsG2RJ@eF9pF;WRBH9PkLXuE0pu6!jD
zrfi_-4Qt6@3+r#Xp=bACR6MSDv(Q2%@duuFu$GZIXOIcG3r-WtgLfP9<^|j0#PDtf
zma2jUH_Vm#Hlct(s8VJrg;ANJbp=Js!GUm~H<vWOJYYk)(!u!*rVlddm846N3jFnr
zDamO{9Zf;~A?yM9)lbN;m~U?*i9qRSoyN7EqkCn2chSo02LR=D*<2xZjHEJE3scUq
z=0rQSJhHslYGpbmH6$4Whsl(K$2HSF$)Kx-cw`P0&lM(pAl)XYOv;k$%-y5w_S=G4
z?2voL9OY;{2$%%Xej{b!jDJDcj|-p)sXXGsE8;kuHGLE7=t<ugXg1FYRDDE$4;w~N
zoN>Ui#6;d#DoGScpX<l4FMx>GI5x6__0P1JitCJ$-?9P5H0IyZ>v5$P5JU-Kq0E#6
z2N;^HkqDKXkk&mkp_XCQN%OO_1>!ZDNT^e9VZ7!vK^hrFyq3=At>wz^m#{j;Ym<BL
zE{?eu5nU#|mRII}6;b!E)Zqx2m3+ARg=?9R{K6YIp~y)i-63NeR5lJ3dRkLO7GUUi
z;hH1fISuPs2!vecBx&XX92Y_qesze=59>!jLaHiM(=UXia}ZU9wa~Niq9Mp7R?Is$
z(+8G*)u=>Q<yr#&u^MWR{G0pSh+|8CG3WyFA~dkDF29uQuQR9C(+G{1Y|<=|yEkdC
z$qWBLgN0(p&z)KC1MMirZaKuLG}=OZQ_(UJl}s4O_DmM*=FKVdT==@5a@<Hy;OhA>
ziaH3=47dfUkx>aGAnO)*_*GQ>mvStY@UF4jVuFpiKKfP5(1PKz$fU$UuoZb_@Oz~F
zf%aIqgCU*@iQwmM(i6#c<@D)AGb}C>>PP#hHeF+ar}AY{PS#U@T1=E0Q(SGDd^i2~
z<R~=$B<a;jC^sIV3<7BLq<_r_O<paLCw<UOXh-xf!xLlr3Adl-F<_ukDeMTe)^6g%
znCM<eOQ#LD`_&)xqsl&!Il2P8zij#R;P=`DO^pR(1n0UqksvDY3&l9-WaviqQ3mu)
zc~obWk8c$$qmyFdU;ENn<ln*Y^FHyUviQO!j~^nCF^0LwRDH6%V|#qCU_bu;3d7Uj
z+xCHw%QB2I`ENVRb`|@tF=D>uV;5qa7#*Y$Z<JB|)|}4}gRk>8TZ6J2WsYE(hzYdh
zF{AWf4muw-Z*^LsrKM!;_$Se5w;#EIb`5V;62r)|ja;Tlb?LX~9TUZ`M2Ep~T~5Lt
zblf5k201@$pITIwD0VrxN3V|RZ*W3%`3%Kg9U2@aJeH{mQlrY)zH9ewk{+T!x7wQ&
zbs~L<rxDB;Oa3k9#`UMg(8seO^v0X=D6lwgOIS<T5GNtL>d1~KSUipa{@1cXq`EUm
zTS*4I<8Ye5@HJ+5lxug9Z+hSTx9;yKLiaQ+2yYzHPNN6jW~eX?@T!$N8FLXZ=|D0m
z2avE<m$&o7Z6q}*bkKc%=sli7wjkhGeAfPP`Irrisj&YWHpS~1_8|o3V$m@Jf%i~=
z0R(n~#?qjtd$%uKCK9}lz+4Z|rm8Y*aj>&vb|Dq&1%~MKY7KScJ2NwZQy7Q;JH;_c
zE<5Gi*vG^MKO#Zb)tXZZueWAC-hUO}05dG^=)l;Hg31{%`{V@hL~Kj#&AG4kF5t;E
zz9+~N^+^zAN2T4K`u~BK!e}zk_JH9CWZ)0(#qa6a@1BedwvthCE(=GWfk$*_1dan!
zlSu#G!w`TP3Hm!oq4*ldlz~N<nNgm+zWJc@J1%9(`PbB_w&`gTtK{K|kk0v7pO!n5
ze{-YZv_uIpzj<%pyvOZ1xR*4=kt;3$yya9k-~ftG5Iw4};wOujdypXOv_!L$xSU^k
z5bCR#vK;z(ApZ!wa}_UFG9^Ze*u2&eeA{-$hHTeN@+HjniT)W_ci@m+K^%DTR0A??
z31fzqAfnj;=PL2#MYHRZ2C2qMmV_h_F+=TvEBqN@hCX2-@^c`?gGCIXA>~tc{5eb|
zAvdA~ux1i|v^_gu_UZq#C!}>j-)rg0#3oy*UmIS%)Q|cd_|oTn)CI3itR}OFlz7~C
z{*}xhZbgvNn9u5b+sj{nv8VeBEdJ8{zM#R`o9hcDlOX-q3+LEl(|-@J@PB`R=xXr&
zaM{z21SdH(`C-;L7ptx%DgfHDPZo_%Dcm-8a$V>=)!9MHD_;HUW&mG(rc!^FO2r@+
zOBJ_9ub=DKboDK;ZR=O=O}(xDG1t_y$e__6jiD1la;MEa9)+c%OMgF%qq(a0=>Bj%
zdNrQO^7eWmN?Pq7XuG<UbDJ!Oh|TmGyZjq!<prTtdF>v<Cp+=uDESwPJBddpow|3W
z%gee=M<>|!+C(aM-S_XjeYiM&pf4;TwzPKnw+_{k@uNMCo;;I~0Xth1Qa%XPgvg`2
z1FubpL<@2lZ?2^5C=JBkG9;iA1};(kY6x6A;+_jjtx!pLFGOuvAxhQTK<v;TB;s_t
zU#sMNin(;+v2xoYWGb6y&}ieb*qV%Y&qt)L2f_;KGD<Wfh8bbiN7dtQ+*pzO6f9c`
z(<O_lIbl5jFR*+hG;lxoL4R|Wi}l8QWH~YNK`EP+k)J=|P@wh_5(8Lt)`~^+$0t+P
zCJ)|z)ozkqU42eRKQ()z5EfQU49gq{bK4v(eQwPieYoTWgl|?rU713HmKRC8ravuw
z<~gkX#BE8-Ev-=hLW)1hJWS6#J~1^s<mA|`$+P#lb*jw%T&O7EcaPFFzhz4R3;ID0
z3VgcCXzI#*3UFvziT(47^T^ECW2x9b^JbyP@>h_^7yHwyZ13Bb!v7Xb3nFbcq>e)k
zqG;*G$rOoqM0$(f^Ducb`sBb_rkCkt7_wEf(6s(xyl&_3nxAwtyUcso*E+Yfui$23
zV`yn?Iw$Z77Val8)lUAEd#{i5I=n%){+vtl>2JPDehfzyx!pfI-=!Xnxru-<OSO+x
zUG7)QCzJecEy4XnO84iO&UGZle!7BExYPA*X$Yro-N2Zy$0m*G;|ENmxy(u+#OsNg
z%to&_KMFK$e0b~%D-1gSqC%c@Bv!wR)qvnxVS4O#mc51lTHi9aC!muST9k`U%oTq0
z(W-G<(Pql$@7&;Bc0FWue(eJ$vS@-UuM2C=(b)a0+m=TS`X1qulu@(mCzjD<<p)jt
znNsj(uX7vZb!5oU3MScfpTjY$b0^Y4Dyj91{`g@riku2r99;-{Ia)OGZNzd$RfI@`
z%c;`d_X97=tb;x6ob;tk8VuIAYq&i&Bi?^fKFcZ-PUH#TFUzY^zWV0oH<mD7HS^d#
zzkVXmbxJJHK&VCD0^aw0-S?4pV%<VK*(uswz(kUf^o@H1g_U2JLQ=!}M$b+Df_*b$
z^!heSN)hLXI)S*YnH}Q2>8riI8%bw3bmpg22^p1C83?_y=&cL()5?2|+tf8`y^S9~
zNer4K<2&OW#|=eP6A?&bohbMM!|`BQ9kk+iY!-q?X0>xZn4tJd9NOZ+Wr(qxnJ{q^
zY^au;#+SA9_xyBZ352+|P`_T$d3j<J^K!t17zRLuY|0D^`uv0>(FjkDgbQZ2;QFDt
zv6T2YM32i*h!}aSjYBTfGT~EIM4sjjic!z|Y~f8~fohq{$#W_1&k~7Enve%d;B~_K
z(;NNTx2BzYb)nZ1tVR^bd~(-cqT=4qbr-<j{;Ced@5-#d+1&}W`P8!lm1CqRTxhu(
z@AgfyiotvJGiQH+a;$u@|6%Y?!YOvPiw`jRQeB_I5QG+C2h8>_wfX6dZjZEbWINq@
zxDvpor1A)0mwJ<E4O#rIc`NhzAsIG?(Zfg_2aHN-D0rlP#UAlW8r5!-w|@>vnt0r#
zN;lsX%zFgh8AMbRD1~p)ZXrd7Adw8lrm?gC$ZoW*Ia7R{yjVoOduSgbmd>5jpn;9Q
z8;4s4UP)y%vh6Vbn>Z!~f*(f2g$Z+N7ZGfC<#jpy(27PCev^M?A9tvcfl~PDm@kU!
z<PcWE0jBZAIj{fYFZ<n}1zLn;8t^v)&hZ-+8KO_<R?!ynCtw;Irm>r3aMOL=B^kbP
zl4hw5COaT)LbuE3cFd;!oU$*U#b~Wvszmz(2bM)Zi%%1*>9*iuO_+~q6US{9q)Ta&
z|Mb@>g%(y5U144%L~E3+={A!6P}YV<EJIsdZh;4OXRmJ3@Q)^KB%#9=*vO}IrJaV-
zB=afg4zf~9b4D5X5l>^wBvkK?zmNsEqlU}GnCpe{vOaA1w|C;rb$&zN!2NT1P*|pv
zuT;Q5L+Xaz^h%+AdmMg05+OkEdfF$pCoEJ_f^rlav`upRk%h=p3{1bEbvb`(bghyz
zla)=QR%tp@-Jk$6+3c}wArFkH8REw_)TWb^{(3y`s)wu#-q*a88+cqKucu47UFC`&
zNk_4(bQmb1icvA+N1acxGJzm_J7D?9MB8luuD-3f^0yuuWla%{GdwL)C2CVwf}h06
zgw$!@#7L8)WC>xkW91eXP_=!Y2J!<)R!c3q*ZN={7r*_q#-`$hCmw1T%8p_GD-1u{
z-OJ&>a-n8LWah8fH_4kEXQa20ln`pAo!h01q%)MrsNVe}{{qCTWX#K?4cJEk*1!hy
zO&T=2Ou^3JioD5pU4cQw32VNW*GklBz1PQeUBtI@br<dlHQ6-v#}#7THe-1f3vsyK
zJ}W`5d#?9}e7${w7uKP3TzyHDJ`T4YR{v-40&Gw4dDo+@OcWFkBKeU-pVR~-+Uf^$
zOu6NTN+aPvHF+^s(K_dN`DjHfU@}ARwd{PH1rkSib_+714x|1L6J7i3hAoR6kZM{y
zJ7Wszh+#b9QtTO$U-aP4WqI69)SE};$qgoJilGk_5Smf4)*4Lu?ivf9qXogAzamKa
z-l3csLZs`rNaz$dou=Mw{(?EvLGhUs9}B6^UyyV8ec03L49|e3=|RWZFv>N>4Gjph
zh2aM?ZzaEi5ye-N+QCiFwE0DGHW|oRDl!iNT6hV?>4UBO_i)sA;cDM_!iCc^+UbR$
z>=o=;IJQgQL8xp>WZoB1(Vyp|6~l0`>pqS}7`#u{%QtFY&cQ|%_I>{F8jB2uNGPC!
z4{P_!@wA^Q_J4p1Muy1gCjW{e<kHq@A($aGIFuC(()jXGSSr6HXQLj)Zo8u$`%Z*}
zj0{;>h)c$A#`3ix$b-Zue8B3Nrh1-xhV2ybs!rR8=bN_U_AtUvL!ZJ`G*JBeS*b1y
zh1Q@!uHG7*W}OZ0Qw08UqxQ~b<F}XIs>Wz`(YDu>MSv#ad7b>esCy9iJ@$1pd}KEK
zE4YK*E&G3@92OGEJ$mafV0Z1AGBCJ0(ibp<Vgjj0`%9KUnLD5SmE%yqU)>;4tT{+6
zLMvkWO)6{P;!I&0m%GD*rLY?5Dm*Ci)<wVk_YzAAV<4?RQTuQQYrxQzebEdTwIRL>
zM^NJ4^z*DS*XBWo=}+95*5ohrmT&7@qum~%b%W?V*<uUR^*ceRcxK%Rt_IPEn+MSi
z4ucnKCnmHfv{v_Y-*s40f}bt2$>y||qljU{jK($-?av!?8K?st!K%?8My|FtLv{V`
zBsfp@fs|LCEb^J0sr<7m)VuKNM5F(d=x46D^3cb>rrDxz=mdXnvPgF}*D!%8O|Pp0
z?;kqy-Q>N+)0sEexcWjUP?kCnIES~fM+s!Jmbt`*O6AF42R=QzxAfjABuH@T2W)*j
z7sj<%{lGDQuuw8cY1pTI*{OiN%+_mmkejbO*kir%k@@_wem6+;#qqAganREk-NJM-
z_5E{rC?3*n3ZvEiKE3~#I#(Q^(xX$se&2EyKt%WNEl@0fkDOw6;lp5%eZJCEe914r
zWT9T)i_!LywIes*4q1^fU<FxP6M`^98BdaY?pQYA;JFj}3IvxG7@TGk%JTi&qPA1m
z1f7>qN`$eLGP&%-`AzBA-|neC0?dlQP@h)~zd}Fy_6D>1%oJ@8wdsdU%tSF<r@b=d
z-Dt{`f>E>K?Q=d4yru*#^s?u#$SdxwU1s@7u;H3(2zmVibf}Rdc5~1lFI}E=6g`_B
zfO+GHsE`Z}e#=Uxl5(stC)tL@_+Fg~t2gv)jh!Kpdx?cKZ)}BGNmxw*W%0d6pB#5K
z^Sassi$-}2U?=JCpNf>emKjnaYwc8CBO>%HelJ4cw?za5e>GmD1E7B3cfBnNN_Y{j
zL44uZ64h{u9OQ)NT-yPreF8kVFeOVm-7Ne_gNyTpxc%Aq^@DbbG){*0XQCC@IeZT>
z%uS~1N=qR_jK#K_4!UEk!l%=U;O#(Vi)y#s-AR`dqNCu7;JGr?f|KY`*kl>0461eA
z!lIhvn>_63uZcIR^kQBHL_Adg_4A4(VY$WFy%uv~lK_CILR1L++nSM{_DLJ4n?IsF
zKFZA?Ud?QXyFYID4>Gu+9*#Kd%@k==v!pUfk3~>~-+ZL!pBrxyg@(Ataq>)#P4;Ec
zvn2p8xcOMTjPiA5(1`^}VcE77NJ|w<iX;0tL~gy)Dr45&uC19nk=zp#_Z>fwV?U&k
z8g|@MR@@zNj^(5@j&C}{*TUo3q%rz~(U-T1X2LdUpA6SN)|&>ThjKZla^|a+G2ORz
zn*-{M-+K-dI1cn>ztcGTa2DQ7<ksu|Ev2B)pCF4!%EE)(CsM)~_oo$QN`qGF>B~W|
z1=wa%w{Ql6tAQBmk3bQ@r3Ujy*Z+gLw+@S{?b^m?=<aR=1SJIoBu79bq*S`425IRY
z8YHEgL8JwxyFnU;?(P<(^S8P0`+48z`#sNbeE)v_=fTYEeXVP)bDis4d#}s9svXr{
z;KydmcDW3?l#Z=F|FgLio+Y#OI$35%J}e9)DsL@JH!f`&ENYfYi{*f9>P<%LA@e@Z
z%{H5wY)2JO%gxA_315Zmgyq-v4ondzvV|kl={jTVI$xu`{plVmRv;Fxeg`%p$}oN}
zT<4gU7P`-e<ViwvR7T1eUHiko7=5uEe*`y*o(vQ0#|FUlSh6#-?c*QnvmbcnRaPrP
zQn}`)iX_H--XAni<sim9WQrgbR<5$IH=T*`(r~HC&)59BA}%T=2Ppg~WOxi%rtS<u
z9&!6@Dw#$JF|pPZh>-MaM~t5du`G1SJiu$~#>?<;mE#mt*j?Khig{hNs~5TVVK5u|
zF*Ly<Z%GKNY!XPGP4%nvPBFLEs9I5wikS_n<G3<ovou7C<dJQ>wQ!9(%E?Z8-+Nb#
ztybgfqRi>cp<ERHj(qRBZW}2`R~SA1XD$3(=i@gif_QInd>J2+m!$hY&1!=y$y2b2
zrr}jaq{i<QqBwT=p8aBdTFeC}3_apvieuYQLLh2rO0dbhr9+g&-bnw<=O%oV&8)qq
zmSB~9O6{8!-*|7~7OMCLb4xF;3JqGn_UyS6K;M!ggtAg!LHa%ldOzjv-)-Xn>*d)i
zI=wQ%CL$wKgJg>{nUSIuLr@$WA6K3Vs$`YxVL>ZLXv`XQ*^z|%XKEwO{4DV6uq#yJ
z6`-js3+%Z{ZlV!@IN?fw_-U>17+q8Ty|+6P+@mUrE&KCfqTiHX{F5^QZ>}UESrq;;
zyPC?6>?y97;e`7sV{I?jqZa(HBW$Y}Jxg7Vr2@?dqxF@fnd>#)bUkCv&JZ9(942!4
ztpZgcX?H5R#I=j{6HJI`v{wznveuw&$R4@g8LCc=PAX42(Js*dlew}lcRqLUNZ9O7
z!3-Uywar{aJ$<r6=6uxhRIBOL$63MR9KV;8<WGOQ*Ra5!6{w^p*>lz|+Rc*Ttr0yI
z{iJ)6ep7~Fjj<&s<-bOeBEItCY)5NS+16KOVzL_;%h21-3C1nvi0qaL`J`ovUma6y
zhKiBKVL{|{<^K<mQ$6;eM*2>EtzSyT%%-2q*3kQ&jnvRu-DbQt&<gr11f!NovQ{oN
zsGSwh`_F#JEhz<6`*&sCp4tcC{zWv5X|eFt>s_}WXg8}_83=_=ud>2ht6wM4!xUaD
zZWI~kne(Uv!K?V}jrgT(9^bR_=P<V{CQv|6Y-bxuV-k7QdM_|n4uwi<-Rd9PDc`;+
zteQtsNaTKUBn6SWN7eMmwok1IsFAXRj4pw83cbsPWu*ixX;)T<BmzFg%rk`C-#>z9
zrST-IN_=6ZSG!{9*Y<mL@G?J8i`tJ-Ei0J$O)#Cc6S!Dky8S)YtI!wtarAMq4c3%q
z*7mJx3{k+`AffKG=dn#)hTS#bXfLA#b*TZ>gIa@uoGQBjE&75fM>|~k*HOf=WOH#>
zPl8Au$$d;F{;KGF$Oeum%9FbeznW4=E%B{`ap|t;-5S$kG_3!2e-Q-f7{fqEEbh#!
zyKUkpZ!1B#wI%Rl-qUrrz{AlaLTdtniZDO#*-!4~LN27Mt@YcaulbaCPoszsh<fAi
z`)|6@E6AsNiOj^9Vss`@E9T*_Q<|Wp1Wp<@KXQY+*B4*EKAL_SG*smCI8^#3>FVog
zs0h(vBTUBn3$2#Xgg!jvQfF;a@8eieA-9!8h75S$<%SEst9CuMbC}IjVXi}n0Ju@?
z?^oYT@t3p2?Rg4qU*FcLSd~dy!TCS9euJrFRa&HnWEN9T<ezrF7Q#3fd*c!;dVBmQ
zM2s4dTX)@#y+su-4tXE6x}e6#_e}x0$EHR~ZS|`J-K=2^kSTV*54Y}Yeokhxe{fvv
z=6K~=e5{XZb<hqRX3;`eP#fKp7(py?znEdcyT|khPM*2P@W}z{qObB9dln=I!x4n3
zZ{Wl6FWFYV+bKZkHLr*CNv8ULTz`{G=iTQk=uPx=d2>G~KbpBf_?f~R_u8DP_iu+T
zLZXtoFjtdMjOJ;T*cV|~0xGYb7z#8=oU|3q<=9W)FYE{kf78P<m8JBXuN3fCYhsd^
z0zOV599w9b2JQI(VF08QFCv7|L~r$;0$KE65a(`R{S6?O!35qO87JX8GzTJg(Rx&M
z+Hoy^0Zq42mtt54V*Z?n87ISoGEK+Q8p<s<O!$kMU@*ru2w}ht;b|r_3?y9o7-g$~
zuO`~+;5^Cwmg*i#K%61@YEdS+Hyb1?(;uSp7V)v>i5|rGD!`rB&T;V(3wEAdqRdyn
zl@5D=iSS6Vsn^K@6*ME=Au2x&BO5iUH>zUL38c5r*(8p(7m1kf36+V^$M*mV685W>
z^KS4F6m<85C-X@<DW>r~hC&kG7rfifbXN)ba0UMmSjzXBF)-gXi8R0hiE91gQ%2ej
zi{xAx5rQB$lvpC?6G#6@We6^>`&wL<Eh!h9!X!}ZOTM)fydpXx_pAnyFn?-EhvAhA
zs2NvH7ifrJD89il!5#>@EgFvN_0c7p4x5#Z8M{M|!~0Y@grG}mYrOkqDGy<){(`0z
zP%>LXR)z%j3qF;mQb{qIPzV6iq4IP2no-+#<~s4{#l>$wsIB4oJZdd(F!^ZSYnzCq
zkYcH#@PDG)l%y8~xZ)9m+5$FontZHQ89k1wBn<PXdnj3T!C|V;U)X*FyfVKXzG}hb
zzI@TRau|}xu_Z|$)wTlfWnCBGp!l)Zb{GE$jtoTKay+a=>%|KD+ux+;#aXe0bbfo=
z%%RJ8S_NI%&C#q#+E0#$B*Lf6ry3Qd+(dq=`TKCazsx>I;+UTUsJi>b(+T*^ZzQU1
zTiv3vgEEtupK(305wwU-I!ysNDMzXU+lB;76Fc-%;&*XC?TTHExz+lzSPyG!olk~Y
zA;n}MZMbab-lnyI3-#pca`pZDM{rdx*`Z;@KkL%ZrinjbKue`|H<y|S;^p@N5=G!&
zs@U21k0u)i2+G(ycE477KB6k&93w@tR|QIz+lc;@5xWhJOe_ENxM8eb@hP1c#kXI8
z&_@V}vLU1@_Afz|8cu(*oXQr3`-%94)OYpkIHxN>UjjH%bHy)M&{zar#yQ09Zq1p9
z;J>W;QBH&y6Q8vVX$ucr*mA3o+I8t7eKIj#6ZsNNo`lSx6Y1kZ;Es17&%=02*c0g~
zWqB0jMyN$tD--g*b+WsMf(Ij|#EZ$9B3SE6rg;0&QM7&8?}k5X34pw(Vn|SF>CE{<
zeKXVcwa`beQt!p5W5nH0WHoOMUPz{T16mQ$A!+-X2~c?(sSipwDCBu%;2*#D!9Ok9
za2lukpnJ~O#Y(V_t34LGZeB`<T+c}4=1gejB~=WFPc_0PIfZoHGT+Ez2qh8o8Ud;&
zr>Wy^_1!wAPviW4#rKdR^_lJRTt3pt#8b1>@Rdz3|A4k(xU#@flWo<h8st7Dp07Ck
zHqgj7J*B8ONH`ovI`=uCXMBuoP|O$C3ocW)Eb69_?H^fJXgc?{*m^~Z_K5lmgPEI*
z?;L@Pbl_XaT7=H?mb5iM813*HZLO?viI-qgk#%6M(NnRMqd34Oe-)nq3VL}V+5Ct;
znllMA#GTAv_gCm~70XUbSEf(MNNa>~A;WcE`}d$IOjh3F%t|#G>v$QE+Y=;3sm1XK
zv#-L*J%7543t?~Dy$ZC*Z0O6?-H48br~&aF7@g7==3=Tq;^(MCN_6yC*yP~kOCw|6
zicXOPK}V7KYf-cQXs!XjP=ED7D%7krpwe8`A*&-sJnG2mfm1P>BY%(bhCscw8CSkM
zHhJdAxAK}fv(Z3XITZtn(fXv_Bm7|T$(PD>A`Js!FCjbzBf{mRyY%JA%gejxjIqI|
zFpol#o-c+NqXC$Y5*q3BAucwmhyJ{;#)p<0XLFQO-LosP+j1I@kFobtZpw9#KU8$o
zquL=#-O?L=tdXp{U*B+rn9b?cyawNXi2U9y{Nh)fjMu?xy_DsAF=Q6iv8K|k&USNS
z#TB4R{pcm>b0l<Xkb74vsbX&NxAaIqv`al>XAHWZ6}Ivfyv>Kwn>?sKb7Y8$*DY|T
zjDe(uBxBJHG)}zId`F%3gwyfYyT^@}S3nU4NJvzVk!B7KB8=2=in(K1GzZY0V>QGs
z()F|YhXm79*Gt$uBvel##2;3ixO7t6O&?t0!}adXV1j}l+Os5#A%t(pvPcmoc8<U>
zoNI>e_l8>nP=wvC&zJlQWR9GYLkLC`34$QxV|nmQ7)RKEWJefV0_0|Og)T(ZzT?T=
z405B{AT;DuXHmwp-Nq6s?Yx}V^aLr={bFAs@0)DGgJ%1g+?@|7e;6R`#fm+E^pW|@
zYX&O^%Y5L(s@3y{u0$itTsF)`wfX5vt5zzR608OIgD}bHbxzotr#Q8l(5Joyj4d&C
zbJ5g;<hd7_+y=*bvPOgl%T=V%{&e`TYMd;iAFZ~;>p7i?A3<`FSZ56XwCwK9MZ8X-
zK#wYasAstCJ1gBGi5t81RRs`Jy{Y$=COxse@;6srMHxz06@6W_Q9)wiF!9UVWkIZs
zb`zXJ@;?zIz<zlBz8`pKyfBWIU)dygO#m&lNOSOQ-*z!IS@>e(hq_NMYORL>5>e&`
zY7)xeshLgIQ&6@x2#?l?vW+wBs^&=J=Z69^j7Xv~aUwvVnP8p{xM=pd!Cfo_LvhPu
zg9rM#B~F`d2c&$jAH^gaoL}B`3$JrMMyhiO&W(l%k;}iok787DwM!G_kPkpSRG<FW
zfafOSmB=8CkssWZ&(~MrKc%!^xtQruyb2<IriB>Cb%_v35ib)Qf!8zf5aGNkaV}(l
zm3#nrc`vD8SVQ`=!l=FcUup!b-YcL^Mwf^;?@ZBazu$BBaE7sFU93?YdmUWd)trPD
zhrNeIVE-yo-U~H)4C8V5b*kYK0J3<S7xx0@A_J!71p20T_m6dK#x}>HM#uOZQh6uT
z(;gk)T0hzj4aVw>No;yBTLa=r67$r2V@0e4%@^+3@HAe>`)Ri8^gI6n9kIT3!3KJ%
zzm-%+D*+bxzWy^S;$X+2m}iY6*-=qWQ1$vLk;{F+&HtnAif`-VRecY{Dj*I$O}X1P
z+^dK$m&x&Z9Dl`7@vr-X@>31O0)2<{8g{sa{r`JS-v3M2_@4M~^LAEZ6#DGL<iP*Y
z(FP`9PyqJ-XMMzyAuj>c@My29-kC#cg#z%QlB}Aue>#_5dN+FKf2lpt_AqL;Lk9R%
zaeW02!x|xy+Km3tJms#$rR~N0fBW-DF*%1yzE*Y>zjm6ydt=+3hQ>Q`4_2S*30x?@
zEly?U<j35PR1YIK*yvEfxJQJ}Cr=~D@^HteQiW}Q+^f8*iXt0^mf-&T2Z1Jfg{IL%
ztDFaGOH<PIVzJtGK705>%fZ~$=y#tE`iG^I0=H=K%n+D-8lWS1I4uIE;?&-oQY*hB
z^a194WOrg^1D#%&Dx{0(;dAByQX&HlelqrOq!cn_z#kQWwtQzAz&h}O3w8A+a7~Rh
z*w@E&_BEY-Y{D5blfG?T)G+AL`Kp<mFd)oj&@k>T-%s(-(+7&o1mF^3F97tQ1AaUz
zkao(g$|74ptIVYD%WbW?Z!z#>^~2QBzC`ZqvSsc_;J*_Ua^=7f57U7d&Yy01?H+@(
zOrh%k?H2b5tBKK@cW0gn>CKz~Lpg-KZ)&v0t!Q3+`u1u4DF*$RX-kik#vY*z;HaY?
zTG-&mn7{&(_^`IN0QWd!n?1vQ{io^dmxpPuJ5#z7Fr8GlL9<8jPjjQgA54#m;s3EK
z60nvHU@h)|A;ik6?9NicDfxi0J*ne;A_t4ji?t4uZz#(-lADL=Y8NB^o7G;A!ZSv{
z$<;e<zb&`9?4GOrlj-(Y5eEvzf@<SXpgM8@H$>(D*jh|<1?j5gEUa~(tg_^JvBoYc
z`v&c=))0O6{Z=|<9ilRNO5VCd?aS#}RSfS(U3-kHY1WH&{O@x6@qc8XLbv*p%cRfs
zBew_;08;KzX*Qd!<BRYYH5yO9r@`5uo}lAUno8)5jlTQRO1r1OFf|3%v%3PA=brwD
zE&*-`3|<wnrymvb@6OQ91k{c`!nu%*s-IDd+ajhNW;FcY`3LDZN@o2mJo=i4i(W<a
zICJ9*055Wx$o?seT{91z(>rqLs^dG0w(nD-x#2Qc5cK!IPO02s+#0FfO|@-^NZL>Q
zllUqa#$|9k4+_)e-bi{@h>s`!RA=tt7QbZ?dP(Ns%g+K&UhKp#bZr})??TUsbOdcV
zkVyK^KhN)~@&XS3KO5s3hkCL?c`Z%iY(v89(Waya$rQe6?_i`h5^5P5U{_vt-QGj~
zv@(cm{`y>Jw)%D@*|;iGh=OnlaiD<l({Lw)c6SKpu=bPPq$zb`=6mv(-^jw;=@O>Z
z(!hYnwb#ssvel+ib=(Hz?sFNUq#TaFyiou|9-kP`i@*LdpO_ah^O^q*edYSrf{&w4
zcMYv98WEqxmW=B_dIfeQx{L1KhzXv}{;1^bQoYx_Aaz1!2ly=ouXk6C@9w(p9PL}b
zA}cK?d0kO#=-fpbW<8ptKJVAbr*oA;k_Ai~jXFb7So#qrMPt970^VL@HC<*F?=A#*
zp22s`P&n7^gDEfdAX0=tivtyjzu@*2pyk%Vnp`Xv*Nnsr0TU4GkT1Lb`5O9Z<<g7&
z_4#1{bW@qa^+DzYxB@CzJ<!w3`MeA$P5ecki_}>()kOy!jyLrH6+DaNl)>WuYose@
zn}ue}KL1^_*;rO!hEXoi4_MU(Qp)|&6xzD?B_ah-x6YuG(4Gn-AKf|P63ngGSu#ej
zlzYWP?j`l!JhnitLp|G`va;MVS-${s{kx#^t@*5NpwSlx`^DMaD4SI2(6z)$vky$h
zU3LOqq73?8@8*oI8%>ST06kv%nXceoW&53#q2&Jzm|mbcr<IUtbSh4njlH#c-AwL6
zv&6c_9jW8}0=4U%jLnm{Y=7aSQI~7_88`dU&+mk|?qz<oaXS*Z1ZXw>mX*FBOg;Vm
zyf*@p#)|J1#s2iQ=Dm`_-je`XHOsZ1)n%3Bjo7Vv8<ljujaCa8)Etd>M<Tbs|HMiX
z`gC47Vd&LgBzZG-9~g<LFD<oKU#%RPzd}zFK9O4G{&nvW|9v0b#ei-@6(SX<!NChj
zKX$Id>n5w2yxg)Rbx`TMJQKNUo^93J9LZ7e0iIqjmlxv)W~IK#Jec0P7Oz-D<*)bz
zVP2PSD#G5<Sv(;6XK6)V>fkQE=7U%^BT1(Z8Ht&}zr?ECz>($VJr_VTY$cM%Lwyk9
zDqR%JeSAh8LL5`AyS7pddSa!_7~nBu{<eSzpc?@^fg&V@bp)<_wiyQPRiUuKFVEk#
zp<s51jh1gm3k3XFy^uylb1L#i>dI8Iau?Szvj@gbFE~u|qG}?!16)RkP49$LP6}wQ
z9XXs$c?ZGtUs%2``zH53ZJO8o!ujxXrn2=f`>P5-<WUYn?(wZLeaJym9PsNiD*d03
zEI1dkou}2kDNVZ){pBznZ=Lnv$$E`rF%|BfejTd7;TT*W`$Yzi@Y=p#AZtXrF+N#r
zkjZ56_|(Bc5n~LQqR7;|*8`d_tCAtY%=g&OJ_WGRAl$awEIb+s5wD#MxQ;)tVoZlh
z2}pLLO{5{6i(8X@h6Um7H8>8Rc*Td|x7jB?cEGc0*1WikL@lqNrmFbRIwnSXd>$ks
z{d>#X(oQ93ZHe>`-s*@-dvvkLMCS9iLf^aOhqE16_o+s_UPRhEF>i;(_}kBYy;?&g
zU#AK^Ua8TZHZuBxROj7Un#FhK@{!^#gXHLW#Scgrj4QeRFK~d(>D79QnJZ6)R=mVz
zYOP_y2L^MlP7B$*cN66`y2=lsMD={eP5{=sZ8Yi*7+}BG>WNgFXpozosoYIE?NNSz
z*C5|#M!Qdb8unM%=gNPf0lQb)mH~AGFj&#?<49gC#72p3Vs)u*y6od@?JF@t1RP+#
zKtH4Be5a2P7~8qNa}*&*BykEngod+4hS7=XgnbS~aOBP`Av)^u#&oIGQ?7JIy%g7@
zi~!K{w^LR2iYN}A+6O=%1ds*U&Xl~<u4j|?A1P<mY7Eh(U*~MviT?C}V?deliF}UY
z7*%F3LQ}OLYD0LuAQwgRbH}oP#U@Ajp)<l{e`e0iXCK;@Y`z{hk|55x_WRNp#k}$M
zr^zl6U%l06CQFV>F`$Snt$keVUtyk6Iu9fygc3YoY0QIlP_9e^B%6r}Rq8agAL_(3
z^a~y@cN*MlsA2`o)^K#G{H6|atd48kp1jMQDLtAB^<Ny%*XGt5Sp()J?<|^s6Z>Xh
z<=wplG?}XEsmaE2?mM29?;kYHBsc}fuqr$=y=mvvcpy8#Xu+Vyb9ZG7ajm9P83=c}
z0|ELggPoDO3I3$5&KdehA~3Gp)8WsOfek^-oiEpk{5!pNcSfTdW+%Kyzy30*kwRAx
zUH^05HbiM$74M!?uS-7BY@E*_5fwFOTFPu9p|n`N?}s8(aZN<yVk;hOEW5CZ-b4=O
z)KO%J!`Aep?qXbZkrj?u>lrdrV`3JfS<llYAzJi@B~T(giaQ}6qFk=Dzm)5Ve$p=3
z_oNh2W;Q@a+c(tqrTul8S&vO~zCr&X5>>+Xv?GR{<7$JQWr5JrOC<Nc6*(c<jJ@rY
zc!_|&Ivoj5j}tJUQ1wu<Ph=8|Kcw>TI$z#RI)q&p?1wS3H&54A09|eHL~m(FET;q7
zO`4!Lq>jH}y^bN@I=d9PZfU7N8dHMpU4^BgSH8Y95-m<wVn*L%`;6&I68ns~DxiyU
zQc|OF1?TsU#5n+Qh9Pg6)v6z)4Xtj%;p+FshaA8pkp&?<tf$U|ej$d{F;)2(6jP{d
zITRRv0U;GcWS{-!=L6@{*;tx<j-DIjykh>=dS5Q+no(~@XXmoe`seya{5&~5J>AXO
zHtKqTM}*IUUk%iq5l3&H&)&)h=@JuVZ{4YW>=a|1Wrl<Ro<6?1%c!$ifucYOhv~sr
z)9pTu^BQkb3_qBP)59aueeO>5QfniLyA)j<w9dAawq@I1VJ8ZUxax}CL#uK)mp&~W
zpZ@q7TaxA><ho?HGqFF1eAh77L{;n1M<|;n{E1Y5Uhj4#e>n8eQ%plS8H!a*Gi`81
z+e!;RoDXp>a|s5SX-%aGyGNL~bK(XFXT2KTR`+m8GkNXtVh_A#JL^gS{WP<dY?eEb
z#z#dv?XoapMOV?(Z;5-I>A{`h<rlrgp_}gMr}54Qw+6(YX4ZwLCz@~}b@eO9?o@n6
zkXyoXTC(a5AhPdHTA_D4|8%H#V(|l(HAag;^g*ZkBlR_ZR*(a$t?i;C#_^_(h;H)*
z3GWX*S6-(PoF`8#*zhHkWP8x7Kj_gkGSG7&U2`xT-_|QhtxT+MsvIB9-`8>+r-^r3
zIkm_*8;Dn+a!QK+F`F&_vDV3Vj_f(v%3e`#ajW0$i@Z#0@>++I34$$yfk1&5;2DFq
zMz7l$pZ&ajTdqnyr7wI)0EV`U_sLc=-X&sY9W1(%6PYmyB4+e>yhPTbhVhoZZ+0x?
z@xuh@X^}$3vc|SBsxs&EKvW^go-x_<K9ANJUxHF_Xx$wDM38B~^m#Wy(`kZ%RJ~=U
zYTKKm6DF)yB?iu4up!D6KZ!<SgTHTt-=B5f6T-x6*2T?Rz8YVQ&XwqiTn(JCnwpz8
z``n9jb4N90a}-oqVEb4nD{9R95W?kxpMm&&yU|WfgHA>bXMcTo3Wl1#a(lb)-rPyV
z0TVBa_Zx1#<%jMQx$eVfg32@cf(3^`b-(|V53tW)Tb>qjzl$>Aa?!IpGGt?RcR7?U
ziajG>G;#z%yK4CuhGpb@%ZnSS6R5ZGzbv_aw7YVZDw6|Id6Pza9;dP3!h0^|73AzE
zFrO2^(0=B2K9-SzkrCRXZ`Ds`laMN)Gx##Iuq1lCo9DMg5*#n#>>RY6sCe<4f32^3
z><2IiYxUPyvuOwoIoCYVCs-q<aKm}pW%!F|P*cEUmXr+WvY<kBodcPHu8R~7ZlV_W
zcK1^HeZBfVJYI*PIb9ol%4)cQrJ<~csKKpWy!%UHwznj)oPt8^BJvtHosPBI4A@js
z+}Hc-#(=i09qQJth_c|vDGEQ}g{eP7_tL5cTkGJ8;Vn7&&%KbMXy{x%=90KciB_pr
zZbtuF=wk=-S@vpmN?zHh_ec>tP<#pjY5JA1t!(5xvb&|{y4E!|zsXt-gI|=BNtUv@
z81(e^Mp3>1e|FlLs`OcG{l@9lIDCe!TY%w`Zj%F_%BxPOjHlJTzdEHBD&ykS9Wtsz
zqn&Z`b2XzF@^ckH*@xXAoa)tPs}AQ1sMp%@@w<JUcgKre_Lf*SD33m(KwI<Cz$Z-z
zk!S(RQm#m+!<`B(oYb`=ar3@LZ4MaP(?C!7$ZwA^^y7nd4Uvh320PJ0QCLuWrKZ?~
z@?nF&CwDy)D5c)d_EID>z>^hwXOroo`|P!fatG5Dj(tnbclWc7(hO<lG^)#sW+Ocy
zv?Bd(8nZ~U+e15UY1&#*wz5U?BYy*W?3c_ce%P6wBaHw!o00bPLc(ieYiG5dUP?9^
z6vEnh;(pPxkwg4`9XnT50^}z;)Tui86w}+|Ctb}OB6}DR?@rFnxr=Xf)!x44b=*>^
z38NGDlwH(FAe)-{a!Apng8<=%%X*PooN#==Rn1o`BD0F=y5`7@h&KeE1e=pHHXHe0
zg+7}M(-iBi_w%C9Sk$3|v@a8Vr`sL+qQSV>IuqAYRDWyLEtvUHx;Jy48s`+`HPVLs
zE$BuAe-I)H86o-}xjBj74kF)~DEPB+^nwjp?PYqZIu|ECTu$%|0S2Caj`q2|P|qh;
z>X`Ughq8-~ye2ie_7((*)pvMKn$r-aynK$F7Kefjx6Pb+d5kR{Xx`+;@a~5VFyWLz
zAfNc7zi^fr3iN6dtfpm;4qbXtvjnl#7?L>FrebUTFq-uHK7~JvV0wQ1@TxOUCi}4@
z6^uI)6%n0byKndBn2+zBrR@3E0`*knH?SO-KU2a}3RI`Cd63vnH`h!kb@tKn6g_V4
zr!%YP85q~rAYu&(0Sbvm;}3T~_CBv#wF~*co>P+v60<&jgTZj)O>-@c0YT<R9%$!+
zyq%&EG2{KVtoh{0w&L!&B+rgl1><6`=UuX~Aaj0H(n;h%@Q(~1oKiU;UZxYOV1*U^
zyx^<uraz!we48eQRIV`5$&#6?mFc`(Mc@r@sWrR3tLL+;nxKJ_WL>u!Y5Q<S8oOFQ
z4O=FQns9>sQID~02PFQRmm)4@CyP~scyzQPj=m^q3<%MeK6c$RdUm+LcX9LZCxhov
zToEv%Z&mVCU~Qhlmmsgp_u*tk>@L#Ei>%d)G4{Xwh3lxdQO?w|GLV?@31jSs>hwf?
zO~J~HUar^e#~`W^GUI%Gll(5Y;P?dSd9@Dt9hfSJszP&!L5WflH<+TJ=_XTl$AvTR
zM{r}nvG(^alq@vPiT227{I$YPcSl>vds5-2CD98Mt}G><uqF?d$QwGu?g4J;Fz9Mw
zd{Ro-9Wn@2l;CBf8Kk|ViaB~}2-)clrdP!EtJG{fEShg0q>l)g$Wmf^QN?}Q&5&j`
zHx!KViVS}GI$YS}jN{g9r4X3Db$r^w1Qh>HR88Ya0EGLYZT;>;?@6wTvAle8{Mqc#
zCfa7~r#Meh1<82cE4oL_s~M?+);LpG%(&a5^^*zch-f;2=&KEX_inh+(%_T`>te7$
z02MQ^%Cmz7IloiTqvSXKqxz9!leJc1W~(?(J;*+=&S(jvF&<3y*RIqosF>=?zeg3&
zdy%(({5Xgyouqv~3q9Y)TkKV&7L<={yxeLNKV||S>j>D&4-$f|R{tnKTGkxbOjBb$
zLC|j2^>uB5s0QzV@S&hTRCCJyY1j}^B>vk!u)^;($d1>}w99_Tr@u<;JV1FZMyxh+
zKi$*iDBDt6eC#E_REBsX!Dm4)QxFu^IQ-x()IVw6W<Th5#v%r&SV0F1#e!BgI1BBl
zaLm^8y@g;=E!nzuUlB|Q^0%16smGw=;n%@dB|5Kgdq6elvVj4)?glaR6){)$_#{l2
zoDts4nIx-7h?atHtVyG$fY<F92DhFCj-JEswtgFU>447HmL;H$+bipPo<gR_%AgGj
z=mjG<zMcMeZTWsyl}=*8vxGFze2prx^RhKH{?l&uU!^khEwB8uLpTRES{E}lhUGwr
zeqX=e@66k#EGqi3C?0%u1Jr1E<-BBg#$g3X_-VoWLGPjwA2J?|R#M4}HBb8qeWYlv
z9gbsoKo#{f6j`H<i>=mSp0nH2E`5it>h{G97$Zhc{Hn*CK6n*YrQ$tNqX^6R*1sPH
zu*iAL<ZS*M25@*E4!-A+^hugrY8HiEVdDHS+eD4|?~V9LkbVKh$>jcm%GIjVmrFX{
zIHVCftmwxrxC>2ZXldOiMlS)(-^b2T3KrXX5&3EdJJ&jRW@*wdah{O3jta+IUXS3`
zDcWKG&0a%~&T?GO$kR={*P6gsUw<iBCqQSs*{-MP{g3x9@1zGBJqjc|&ceO<;(uuT
z(C_K)2Bwok{LEoNSM@IYomO=_WGE12s$z7SH+7%FG>sFD%74fVbtLWnWs7m@?+%99
z65kT*xnR7o+B+NPd6DX1J(W*GSEkXI2ivG@Je^RKg3XS|=X=MC`isaD0V^sChEN~g
z$l8$S?Y~Zrf}$}Uu`ziY(FWY;Gu=goc87D2Np@#c2P_ue<l`QfFNmMmCC%sfKV3tH
zFAA07@^aH)_{vPP*7uL22`C@n`0@Wdv>L^PEvYQX`e59=H|#1HE@C%HQGPzNbcnUS
z+yx=D<j0N{TN~m_Z5SB^ANL@!&rY~XI@nDRc(&%*U8KVo%CSjyN3;39g=w;42WH{?
z(e3EOKs<VM04mOvKVqYl?mMfEt!B|g<`EL~g|2!do%jRokoQr0%?z!i@M&d9WRkXx
zjp*C7RFIty$`)b!YegEt^nG*udlrz_3c>hry9E-$O69R}3UIUu117|7&=cUymg-gs
zOQ0>>mt*&VHK3E|m+sisWTU)&C{VbOGlf;Agctcv6ejnrBuW%t3Q#6U0LjDz`rJNq
zLE99mVzn)YGo?CBa5e)Q5=;N0L!_D}KGBl23u((n&%NaLYzuHnfGd^u5lanZ<8%%4
zvKv0IjTB`5IvK5(S!$l1qPJ#GU%z_wN?2GJIHswpvNC>&5>8A+w6;$27lwg=cjAD2
zM@Z-b6Bx^rQB%x-9xI#NBt#<v)CvInQ1{~|Z?B!nyEDaLczEZVo#MSbJ~jYk^*1VI
z{Km_%!!MNIBjLjH-tY%1GVnpy`%9@uMn+;{VkWu8QLFrKmZfkFWITiPqA^oV2#_fK
zmp3GD`OW~Y-)g`)qS!|Z2bdE2sg#;ja5!w>Nah`PdL#GC+jjYlwrvkYFOMGb6o0x6
z>htJh@}apXuRARl9yP!i$cKac$%=g(#BdjDu?hG~DMPO^%x=8IXTmp3CX&(2#3eCb
zKkSZ!saJc^w_<~T0B@28r%OOvcm#aY#Jt}<IDR$hjQi0ZaZj`T6HjBF=Sk&2T_XbK
z!s9GQ0+?_(0epD83mEb}Ff^36_-4I(Xh>OJeg}BHoQsR3_80@9;*&1y^uXjX5Ku+{
zOQbt!;mWj)XarAM8G#zl{=1Dr*%KBP+#irn!RK$43**ocD)_Ai1Uvxs{*mS}mCyKG
zG;|D&H{&B+*9Va@%x3^qQ$|99i`Hy@=eK-xMmC^>bSHnamw(3uQTS;`sc$)%And;M
zfyVMDN<l)!LW34IWC#GYMvsrto^1Gz?0}O3#bKSxuHwitzJBa7!EZh(+bLODfOoWW
zVK&oWos0zZ-g-?;_iLeOgiF_VOjpar<^rVP)^p;c#mVkO4y*4Bkc+(qT1b<OMA=&s
z-7~Nj$d?Q+vQ{ej37!VS+)$twFS3fK=?hOCv40<F7L3S{e*Mgj4hN{O!McI49K<vn
z#7V&j$u5d=JM@hV4-Yt`Q&8wTKU^G2mhvh{6Lxya#}}&~$(T>?0Uutm{M+v$jTOxW
zygnyasow*YgWaGPS(!6=3+0+}I=}y$R&Y=ZP%r!3MQeXtnh9i$Hsj_%f(TgnVXlEb
zq8HC0VV))7{yq&;Hc#mfnhC-k-jwDy7#de0N@4f{;%<AK9C0z%GaIdp^yf(PRgg2p
zL1hUjW}R_6P~g|wI7p)Vi!MCUgLH{V(%5qaBCaU!QAxZvwYRrkFD+@22}HkGfbNv~
zC&IPiI@*<1i;Z-s(%#D_jY4EIpYS-qqja5Nt|w%7{djv4Bva`QZ^MeS(8FaEX?SJv
zy|tyqb2pGfjKxg5UtII<eR$nu{Ku%~n~C8YpoBTAQdmOCQ_PUdC1f7M(|jXb#T_gL
zE9xrQyeSabb25UB>7{!v@lx$X=@Hk2Hgx7i>gGi31gR`nqtqWZ@cxg3HbnX2VPSnZ
zwZF*)&X^nRdxmm)elfk=YQO}^4g|2^L58SvPr^m?>)r6$RkdO;nR!<p!NKi?b@_})
z6@~$zh!SyuB{#B2;EnIAn=u%8EWZgjc_}bB82B+qVDcwW>-6Uq*`Pox2!PFfBnjI{
zJX{R1Soq#qr<UW;qsGU5j`uNpPGDtNXQSk=q$&xM(V*wnuKD)$55)f|KiW*|!k0^g
zpg}k&g3ON){!02e3OsYptI}uFVsIQCb$OROGsb>=eCYLAx;eg0K?5Nil?esvc>2te
zrS0q_bz2z=AAhmj90)%wuh&{{)zpX)iC`ptZH$M4b({bu6Bh8#S1QZoQ>%bOu;7cz
zo<w8c=d=T-+5Y#kK%g#D0!jbj{@6vPIIGIaz5*YNsZ9>t_cS-l_I&oQ9|b%(&>R3#
z3^0{mG0o9daYI@IgK$=sl9pC&p?JS)6Cr$9k_V|G1o!}_=-)EL){dEN6a+XK<cd?x
zGy*ZYnWm9>C<BwhZhc5Hfcbv^D>hOAJXF`1#KFP&wy35S-RuK7%%D=?D}Uil3H%%i
z6C^AI0G~F#f3^p9D0<_#v4K8(ez2-c8Xwo**0v?6o&%i4pvBTTkyg}()%Gt40(O%u
z;~!g~damtU!?N#*v1f~ghQH=mjZ+HmS!wnql(FI)80q8QQT^A8bz;C<tJsy;D&7|M
zYDR+YL7nRIlJHF-U;!!*``>%3|9jh7n2HmpSs`!Hht2!JM&K}Vi2r=m-2PZ&l&|kg
zD=UuM$E#1-e*CaS6y?>yU)%hQ_<QeF*+@QfKiI$J13F@9Jy`!u#_srA^})8081nES
zUcUZ*or<_Wx>O8^JT!}cjK@ZT)k)3>`7@w=*!hA>7pwKXv2w;XkLPDj%|+KQn}c0x
zX*=~2L<?a@6HRXdPz9|<KIHbvn2Eba9)jiM<jll<_8VC}!?)Yi<*jE53txzc1P2BJ
z{_S&hf@ve*8pyvUB>bXMRW)A2itj_={@|$Oj;q<MQVS^k_&8yaNQX_A7{?rIv#$XI
z%8ddg!3tanCV*d>v3j_KrZ6KLzgCjW0iTp154W>&Z%zBG<tK;TM%^HT#0)Fk4qqB~
zX}h)Qr^1YP>WIgJ!wm>r{csTq0*eO{pB~fjw!a_|K&R9i&~uIsZBsjG8#k9pow3BD
z7%sK6)pC0kLa8s}xIdqlnQ_N2GNtgDw(dCck_>yy4-M)gSiiW~@{vo(VB-}8h;<b<
z5`zHHfFLm_CT2W(TOiM!dXR9?@q$E0i3hkn&--j)rraUNX*L$U$r*DAkR$6UBWN@x
zrqb|Tt`YJ4kpt`}^>b^B{l+MeqXBZPy0u7DHPBD<h+fQ^vYBx=3ln^XsVP+A4N~kb
z*qF-{)lYa}4G+c$doTtP<oE5oRd4x@wNRG?Qbpjby4yP5N4-i*1-VpVYY;4G5;UQS
z*ZAbU1~iroYrrJFg-=%!6EbuphLF#3NU!4+yQ1O}As7hIHma*bavZ3YaBJXrK7fJv
zpUuZWL{Dq!kso*ITtl~%_7Ii~JofuDZy!hQ`U1lUJ8Hf6=%8!@(~s?zD8f*EyOvul
zuqVSSkH<0(c*bo=t+;f(a)*{m6Ana@pzJh2i2Wdh*HVC~0wuF9fZ%MZ4MILSmg3Gg
z8sNZBwKA?>G3i?HuRP7g<t<C8zOlRi!iTGvKI?QS@^Rw>o9N=9tj54HY^+?=<(l?0
zEx)t+y{cK*gg&tUByp4$J2<BN!HOySmBFD^ZO@P@E~vmMVTAC1oPQ(LQV62K_n-Gb
z39b_c@X0d*Gyo9Oe_Q!@4+i2PS#kjpk)^`cgaJ;mm4THg0r&^tyaNDRSb>s9uP_jv
zd`ON?y#IMYLih_Yn<|$dzQ(@>j(5z3+Qql#TAn2!cWF))3tN}5)l9;M{eKM}&z*LR
zS+wUTm#9p&bo5vq<qn%EgmZ{p;m`>>u80o0!Ho`~4%G|D5kX36hdyiTUn|KgPIKEf
zXcm!G{=kMe2tp@*Kgm^MjK(}|XMAU^akeKx<m|TR+7TW>{@>P{VhuzF;Dj;*x^o3X
zOK1_PI&OP@$1T0T!X^O85j9kR*286T7@gr@%7Y+IW4BQ3Ly8zlg2n-l(ZTUmL<*7*
zH(uj1k^8(?m(*0zT-Kx}`;c~}<<RjQZkn~>o{&j{(`Z7m>_GJBPdV)I5^dOU^v|;e
zR=qqr9!GPhd2#<P-u6|%*8gc-Gmkni0tgRtIPC-xtb~0OBmnl)sd($#QKO|lupXZb
zDi$%~vXb0rc}50XLdXM_HRLAXTNC^^Gty!2B_Z{$vr139z!6Q}Z1Vev+&o~-UN2S?
zNp{$_NwSn7)XcR3pp5_jomNBH(rP^v&t3Dsj~H^Ay`?V)9qkZfrgAe^CtX?fb*_aU
zzCwM)(shvgBuUgffh6r715w4UEGI9IG+ed2kVp(KZafFiD5Y$oznN3iu^sF0RMchz
znIK!2d%z>ei!BlP(>2ozdoye>0edV%F#mWkCkw7eaCmh>Oi3RmA}Dw8Cnm^xznorQ
zF<oFw*(Hg8F_V&xTLzY^wRfOQ$E~2q_rJd8KniwWPQbTYiM=&Ib|Xo=*%A_&aqVnI
zxAoHUc&gedS{857krInJ%f(`Vqm2G2xqO+Hq-y>IOw__}X1O4Ic(GM__)b<cW+Xg>
zw2Np7(pPDlrJZk`zbm0ay>?O&>Z<|8&}ZfF83^}%eNPB)h4fI>5nod!@qc`5N$;p+
z-r*P{0r<h0*p&rtYJ=mf*ZDraJ1)h1<B!&#Jpuq^fg#9hF2ghvLw0D^I2D&M#nD?5
zy;d)mj-HVzjMr_0Xryk>EZbxUeqx%!8H<<S89J=!HlK-IpHqrXIE`|eSpl1rVpdxJ
zA7FHYw1q6#41$BwNyy?|=`U_>7Weq@o3H3@(k?1ny!(Qc3ob!yFxmpjK)W%9%|sb-
zd+oE*JbLel>om*Kv;8~jf>QcVbLZZ@>}-_HdrrO8NafFM`N^ALy-wu!$Ui(va*(z>
zhuCez`1~l*xPc15W6$+{UdbEth%8Q$87N+A2Zu^781DNm`O*jQU;eQ9+n!a^f`^p(
zFM!y<hTJE{+&ABUBD22UYFJ&Z|0dcUd=k7S3Qr(DA2lTYjRZ2C-|m>Z`B1w(<b~&n
zh5}v!L8cwWF|ishqLDR`L-So0!Iqk^;{;+A_vsGDRB~iY>tIbYiiV3Q5UxIryK&0=
z&a}SA4PF=TdBS#J(NJB0zWK&IDHCLr!}**wDlsP)U^M@N*ko={F@JW&<UluT2|VBA
zAjqZKn3r4Zn#_)wZp!rPv(F;yIv7sj-td&`(*A6C^3<FtE2*bf5X443qzXf#`E#K*
z@qNkVg<Ge}ouVWc&j1h_65dF?LFIDnkt}PZB-PKtOoCd&+`v%#YCz69tAA}8^A1QQ
zeGMEyL+)R>YU3%=3B_TJe|3|cxXpUS?P@xCa<Glhwh3*q6`9-8MxrI*q9?w$^VAji
z3@J`G;=-BlX}dRe<#xj=69ki4xc{@T9p9;qRqig=j@%E*q6LWX*84~AOI8W6C&`4+
z3M&mGAjN6-kAS%epVJS0;*y74!k$CS8@)^f9DXG%!74P)p=<9Q4wN@cUjnq|A#-Ow
zWNr;Yc;HfrL-z8Fb0&HFoEqc(D_9k#ka}K8_+|-fzg~P*<00AVP7WNz9g}4Ak!oo*
zfxy9EQR(^SoivPV^xqQ3SA4|rub~Lfl|u163U1IqS3CxJiFna-t51i%GJg{2^&5_I
z>kd%5oXD5sr<^-`YWwicC`HvBrkL4!!oPhY+ojtY_ojmkJ%0Sl;k)_Ewzj!F=PB`F
zbm-keW`Ca_7r@^DClkj6vJjaUP!0enbfXvgQ}4V<k3aY&7-BLTp9g>1vehHtJ74h{
z5SnmaIPHv2rq%(2?`j)(`I36IJi)@d`%DVh4ykSvj75nC!d0sSv0=6JnLm3T=miGC
z6kw<SIL@pasxt@#1IbCheBVR~PO-i#krtZww&tsyIHlsJ;+aVURSY#@P~T@enNk5c
z%)0uRe~sUI`ZI@E_q3B9X&5&InTz-XS8UqH!P`)fUhcnTVnX=z#-V%wWZc-?ZJU}K
zn8JAG+02Bfc=vl<Mo|aJT)7HUm%z0_sqqTHM&iw@?a0h_oZL@j1~9H6Al<t3_UU*l
zSY>7PQbq<+C5>Z^>HobC_z!#)1Ch3|Dj(qBiSIZ;F_SgfQc%U-L$QVG%Q|HYFZMd~
z7b)6p^L0q1HIaGy;vqc-q9FijjYy4ntmV5p{NG|(24+UB@#s5H=3JU)f%$8{iPe2f
zeWS0U+OFD$8T0Q%3<atLT$1!T5`Qy?2@H)&+~*?mGgTy!QZ)a+u|y__2J3&z$8~RJ
zi$(YAxnq*KSN<Eh;Lm$KXlJkMLP!Lh8kxCF)63Cfpqy0RpL@g7#F=RG<J}K&zD$7W
zbWUO*bb)eDGE2pi5Q%^B4M<jA9l5EwsCt>E7P%tl8gE<pbjIn>vib4%`|f0HBxT4l
zfXO}I29afwPsat%7XI`*jTU<c@wrQ4CPt(Mb7P2>WQnL+PMKW89n=p*mNf^AV&_~$
zQP_gp0N$<X%s?f89$5+-09YU75-lR-iOZJhe);0Fi<rTXZL#YpfQ+%io>R&K5uic@
zPKkcN1%P6z{mq@mH6Qj%>Fj{$xg6TB$TtVf&KSZ5KfIYht1LPP{8l4vV;24~+UG#3
zDcDttj*Fu)K6pI(v*+98owe}U_OR{R+9kL$=Nox(;}VfO?OxToO(R;wRTS?im3R&^
zL3OQ;UUCre1{b>f<5(H?s{Id!D=1EQyk$t@331<1LGy1)oPbgx-H*H4YMiXw<9j?S
z!41HvOoVqzwA%^cMdF(&u5uWB9e=^a`v_9Nq&Fw}X{&8H1mPF%L8WTIv1(}eBc=zB
zGDJs>0Mh@n>FCi=dQ|dUkANNDwuTvU#h0Am<XtnEad&_s8Koes(FFD7Jm%QBFm#DR
z!PRN;D0&RJGj8@gARw}-)N$VZ+i!r`teUy?u)C;if5Qs|H)R2pzbX1x!*W$Vt+!Sk
z{MW)xrc0U*lbjd`HQiIG29C7n@(lzJB)2dZ3ruy}rz7d2lcbZBk^=Z3w*&V@_xSmn
z369a0AKLACLbf%%Xi$(YNJ|NZcC#;rz{uCubMZURFkgfGm{$hJ1uLaZPPTjlFogu-
zvPIgWeOoVwjjC`?3(!=*$%%cBGd}+GGD8;dR#p1*y4@{qU9T$BqAIV`2t_hfOWahe
z`W5CqTHF0FF$DDG-#q{0cc7|n-ZMNjv9hxI@S)7T(|u#1<};|Gv>oUgUV6U<qSD;$
zpi0MyYK!cwEjRvpuro^#{w6pB`SB3`k|>$H^IQ6nR}NEtEvqV49TkDm+b%+6L_n3}
zo9cBYE~wLh`P+TUgc|TnGTR1=^-}CYUW2l6#x2h%Zzhp5IvgC_*Wk(`r%}om?XPrf
zsrY0c`279wUx=Y*90-cwIt|;*e50rsp#_`<Z<d^nXh5#0p$2UE`mfmiYvZp3k;VA<
zrh({81LAD(f4_VO;7tbxVoTz0<otWJ09;al>pn*|cz4XYEhG_2@q4(~7UzTP+lpj<
z*kqrgaXM6n*1Cneg-q4Hf#K=7xkn~bGoM*CJYVc}h#vw!7W?bUaW&Y$U8L3`pQHz8
z4$j@E5PM#?ns01lkBuY4o15x(*M&6Ha-2lQs|J<A+JJxU?Gj#BDD)p)15avy@}P@Q
ztC5;MkDut%BR9G6$kYn0Ny0>+S1;JAG^^&Fi7D@gSq<yboyp6~y9?V&KctuZkB?EH
zC$<}S2xzO;99Z~#HTKjyjC9R}le6F0*m#@XBIs*&wql;L1q*k*eW1=lS6)BXtHn34
zs?>6vPmd|JYG0s8eO2fZ2Nc}h@kZ{yXsa3@5I{bBNGFSBn#d=4-Rv_vVz{_6zx@W&
zK7S4$qZW3BN4iU%JU6R+Wb+u3s#F!2cAI*rM4?!WUSTYzn~-(<srawtQJ8}HU=Kq0
zeL^QWJX3e>$M><PJPfeAXB&zvEG!Ed3ToQyGP)xD-*w{#P`0M|^qwFb=SQj(^JD<m
z)TW*IX7BJk{-4boU|s-OTMJ@;-iG4-H0BVF0G|Fi*=p^V#%{HvZXdv@=KmwkJ@gfX
zfDC;q<9twmGMBM-I!d!Yaq5JSOYwo-y;?9sGpXRkwj!xlB4*<A;J<QbVR8pG{2KqD
z@2*ZVtI&5T!E9l3{qo0O3%Ay0S4+<bg=Nh?W&b>^iT4=dZ~jJ4v)V>1N8P%*xPEQ5
z&gm??a}%%g(E~$y|2)<exndB+$u^N)B;ZM%HT=Q4CSutvNGHWDGjX#S8v&&5E$B(%
zNp}SA`hKk=JW_b9-%1HZ?=gh<aLx|b(^Espf&A}HKvqMCas9!@i6G-9{z}9I!#i_$
z&d&q)aV*M>i6Lst@|QxMPZ|h?11%Dtb3MT1ESiPy@-mK?Qyqf!6V9En?;E9HWQwm|
zzrMb@qN1d<u&}7KQJk0fU06`?!mOV{LZbPzcxsqrRsaJElwX^fLm!k*=c4>l|DZFB
z{3OaIo|N)PSM{Y5$40Oj(n-^Ri-`#pB)7sbJy%hiARO?Y?+w*D7l*V+6|zs5nA&G&
zb=1@<_3E7m2M102Q@sz;{QZ$hRNlS&bACQFIGC&9E17i{13(M(%THHuMdGL@JG;Ww
zW`Z(rx`1195pKDvDjzX?|GO7sS9i-vzc0m<Zu#ENxH!keNUvUx`oQ}IMnLex!orf2
zlq4Y`;o#(S+cwPIP!xP_Jzh8tOf1L7#@?=1Ws5@q2mwf)o!D?SwHb`B*U4<c!i9P|
zre@h+%^pLBlIjRPsk`o*ZdRzjZ+C;M^0P2ALoF?-si=<s^6$?hqodUPP?b-gc)jSc
z0*OlgzGVuqV@GBC{%mEo5A^bBWtH_;TN+~l&bMYr6{CLf(W8~ISIhfRgXOt_32||I
z<BECz80W-5!_~E>tn8DaA%hpX?bi*(^D}i-Rd7=7p;VTNBnc8f6(<oH6YR{n)E;<d
z#0T1^s;&-9<E&)%mY0(Y)&I)zVrtOfpbj=&A)AbeV0D&=O;q#iKlNobD%YFk0wL&>
zvep~t+k6fQ;P)l4jP<p(Lh(PVz(J32Iuj_#4mXfvBFHeq2qDxFV<HbddMTYqI#E~9
zOK0Ge@x&aUQ8TI#m&2E*96|Xk>F<jyiDe0PQ6$+IxKNJ7hJW|et>#^{Je%dbImZKE
zaX)2b%u-AhP|GuaWVi)P%wEyNBiXS90{!pHSe|WP?oIPu1y|8n&(#q6iMkxnc%f^&
zyxkG2=Sd}d!Uw)unz;7O@aW~=Y=Qput4bK4^n?U50)izb$Cud%ele;qsxM#qMn)>-
zytavZBDDL`AMNG92`w9m{O`hA5FqbWY@5tBrAUdmY&~I}#oGB|S%4_;!nbysPky&A
zSd3>(_g3|A)-m;a9!R{F)lUmnA9dh~gdWox*I|y=@%@hon!zs*Nn74N>_GzLYw4wL
zNvHvP`@3A!c$szm*0YzH<d-K+Y-Z1n$|%4UAJ_vgE?ghhqot!G6%CB&wwU;yoRzf<
z98|u|()(6Y((kJuO}WJvUShc3{-k_dHZWP0Z_#GC*+mirHI$yI#a6uOe0TS;JqXwC
ze$=7p@y7*6u|~jz!!B?Yw$`Fw;FD54<*bIUexFhW3eV0W``rW?%@5L0<>jrlpRlDW
zItZ0+z9=W6EB-p?`zNa2s!zS#oWqQaeQ0Bpb}7F!h_8D?R3Qq0%reW}VKp5nh1jo_
zlSzx83BeQst~9!GW^&7}I~OkqWH#{{8H#g9C`XDs@p!^Q69Ex>ccM5oGgJ8<k-)^s
zSqvOeKa}=jbZqSX+qXorDT#75@Vh#v9mI^Hn_Hi4C}*1OI!>1)ZvSXOMI!}>cBzzF
zJgGxZq(IOLp;=f(GW|_VUNZuma&wySm%92I8nodWrE<~R-~V1kg`lJ3`Ij$WK7al!
zB*Blx;yKO$6L(-R%Lw!cBP-)GPx-^^rsC*$(sA@ohd)t=r|){tm%JbA9*TrGHKpq9
zdpcgMqwL4hb?Mt5W12z?r`@=WZC}@Z>TEUX|82IwnbYA5UsEcmA})?UO?FjgKz6qU
z(I3-x<Xe2cyb`@xK>!cLHzM8n|LA(lfU2TyYxvM5ARs9rA|WX)T_PnVC0)|pol;6l
zHwS6yZcsuxq#NnZL+7{9`@HwLKfeEP_S$>RHDk;<#_|g)b#~SV{Rh+waL-UH;Y%@F
z)Jl|@#es-9k2r#{AN*<3DQLqS6w^mUL=F8J1LNdSIP(-)HyRExhb9R+Pd5(_O?CC(
z4wo-3%(gc+B!<K@72s86;WoWe4jlK!WNhjLSvLC%x<k7CZi5{UzQGOuDvEDJ6OMdp
zhP<WZK{Zpeg_y!iBBt-)aTw>V;f}e@{avlJ#rQxZlR-X4j6YR$mfj^6_>lRmMagC}
zTp_q*{+Y$*Py!LRrwlCUC@)_4BSc(#>;^SwNf5PDo^JOl#FaMAsX4q>W~Leyt~~z}
zUN|V4n<Dn=$x|LrdNWgn)~{v6f68aAaN>lFQaL-Qn6&exR@T{br+&)?KsccE(*ZSl
zc+Z1R>cz_AY2>7fXGG;f*N;bO`l)f+hVWf=pRC4Gw#x1}V;EmY8M;{4apjj%IxRaH
zU7e7Y3I6GDn{dZ_{Aua1paJ2Ru;ADuSz&NH(*1g4J94<ySQ%Q!1aOV!i=U#lEE~~#
z#gsRQA<c7E9>A+ZUn#Q*OZXi8(%dL8sW%pJt2D!6(xiJtX4)ah^7Y~98E5E##+}@|
zcOS856FDsi5Iw>4*`jnTb-r$vnwmx<;+?zWc#wq(sP8;mRecamUwI;GFnni)$pWz5
z+z;MSswZ;UV7qz*G{f-UoU<!BIy&;f$uoQw%}o4`18E+-PxNp4*6IUkCSCv*U-7Y=
z@9~H3FYo);ey2h(_ZP(RxH#Q0`72gf%V@IW;>J)_fT6p}Z^}Z=!IbV)0D@Et7?AV3
zlbRWvNtpg}zC2JS^B$1W#m9B#9kFbDrkV<h5HU3UIw|2!&`71O%KV~zkj1olDJGdB
zU<GiYwslQf7Q*r)QK9qZgEXcP0lToA4tZZ1(%~YrzPlMJ)HQix?qDy}R6>q7a_ITz
z7bUj!@n52X6wodv63prhZ{viA1p0{xW{cs_ZAKdevi(G73Z1h^iWw^TR)h8|DJ0u%
zd||B!j>E@P;12kp^{R0v;4_gfZHeJJPJ`v5^)N*Pd&6m`ZPSxC6SbtuA*NYhsa#V7
zsX%f71j|9IG%_+$US1wt4{3FKb*iMKw6#XW>#-j8_3p7oV-v)}jR3KXZDqQOH!No6
z+8iqey6=o=>Zkb_g4vOps^fU0Q_~*Lv4a27fy;B+o)UJt;s3?oQCV3T+!el6?-`ez
zocsqu7AP85u#FA!HZa3cbk#2sj7?@WQ6(Nolnim~TR#RxJYGDKIuRdpqHLOWm$N_{
z*88;TZ2^?F^{|)Dz!k>p6}Jf^8*iFEdYGSF=iKNimmw?;_N*2Exoa06x{mVLH%PU6
z`zNfJpF02OXEPjWX8MA_)z92%4iqrek`EP4Z>MVR3y_^&Cv6iMrpV_02)}DSQ^%7{
z8I<<4q!`2tvmY)HBRa;HFYBo{Z`>v>@KBwuIxZhGJyB5EdgESzrt^+WC>Y`h+>YE{
z4tez$+3UCdNXzMiV~AMbKvfuia&OY&0+Vfsv0agDv_pK+6qL3IQ8!=RM_<cJj{%+W
zy~?Thb2sQGfB$Ea#KcM9I&4t`VC*NjMm=s{n`>eG8jbJt*5>^os>b|Af^S|sd~dd0
zUILhAQ-0lQ*9J?VN2!r|vFx~m%<fJURG+Z4`jw80-SWwgSWep3PH#p!=5_pRFE1~*
zo2Di=SP*QG@(NsbzPUzZZ)<yaexA4SO@U72dosd9q6dP`zpNL3HP)4)PsAhSF^41c
zchc?en6AN~lZ)R3*+lAhYVs~^FQ^#kePVfff)BLO2!s#Nnx(LGUz=QBW<ci4L=3yj
zIqYa4m^`a=BLa_9zRDg8SiQ1Dck$gX74Toh`;{Yb_{Jkd!C!oc_WTWGa^#pgKjLE#
z@Ltji0*kUd4EiYS<{gd|JQhJ1mRMUtz!-v^=cVqd(8gT&Q~j%_aV0mgh{Ou>_<i?B
z9fZvulOEfmQ$qy0`omQtCDp?q$r_)_ypZmUJU0f()YK-aW#Z==T_RgbNvqW?Fp7s~
zjNlm#{+e&*sqm*hEpc(-qeBBk(|vm8yCcea+u59VFzDEl3~u!v-k8-2PjUS#{W6)h
zC-<wqREy6SRtfP(E%YAlf#AREq7f8Kf0ux@z1S+0`V}O5s;UIdq+%w6sjO^l-~N4C
zUWAh*1lIY66D-vHRgnHye=PSnXC}!mJ&&Y;2nb7LyhHTR>l28!5%j`Y-N9m96ZOyg
zF`64=Maf!sPZw>|(Tn0M=t39I-;lT9@NC-2Q_U4-Y(GX=*7ue~7;o2DXh{P3Jwg~R
z+r+Lw$ZD;{Y~WSPCE73u&JMpO)EtPWSHm7FoOYydWTVW4aNZHUBad*@ChFi>+e;6x
zeE}w@vnAYsD~=*=*nae^2oa?SZvrKq85@Mz_rr^Y9=g%koWfH1;ds!V&rwp&D5p8;
zik}#M5%*_WVN1i!c)r|_`kegG6#q_dg?P?ln6F+Pj$5fnQ+8}E0^G!rC3XvZtD4D>
z<Ej0$^1>pzNJl#W!1|Q*6?=Au0x9*0a~YI^8feU!va?_|L{@}C!!Pxtb-$e7ophBR
zqt^{GWEB+jT(cusy1BWHmuO#uD|k=Hj6S;?3yXb92Sp5G!S<tJaKKMaF*jAz9;!=(
zE6nopV*{=(4lsu~yMqAcBBZ5dCjCmMkSd#Y#tX4KZ{}T?bQ=(Gql6~g$d1ucqJvS<
z2xQ9Et)KD^OV&_x+Bx+Rpy!>5k3ihhgg7I<fe!{%38~c4CZ0l!IMJ?<_2&$lvL0ju
zaVt7{Y`d4#5K5gVpSWS!_~lVg8He5aqeO>C94~bXRrj2{yY%o(iMUWqcf<}=bD57T
zq{XfxgFM-si9-k^MO6Yxq~YqC+y>lC)j%FKW@EB93-zW#n>dU-hPbPBJ6)jriTz<p
zH`!9u4I)8+VA{cRxV^U2(?!rT9qB8{hc55e>2G~IwVHJ_&lz5yD#-(=Yi&Xw{LcY;
zJtNl9K07($@OXrxrVo#QXc+U`+QeWJ;>-mE*9tw6<8S(q4QYU-Zt8Hc{vlA>YJ5FE
zO7r&T3;(2{t?v*=Z+)rVV1H-~@hF0lBO$H1z4*o=E3ZCOY1wu0MeY1nt)accaUc0u
zW`2J6brI*A`x{tfRaI458kv^_4=8ukl(YXGfEoCcBkiRMrZvRhQfD`AZ$j7YoBFY}
z$%Q{Lg^~G*^r$g<&R1kuBGA1^!W~O}Iwls32ps}lV?KMD4MO%|_Oj#43~ZbVIFd)6
z{pvjRf_dlSIQemU`W(e54b|W1m%uGF<JMJ`D~Ow*{&iN^!AemOoBBz3%{6a~eSi#y
z7xS47WKJ<A#Gj$Bexh1bQvc|9ftT+U{hP6y5gSU0nTq*iprlK0Di+p?y#y(vX+%D!
zKvI*EcY$wwHD)6?zjQNH4nQbRNB21aBsV|L7N{IjeY}ED<!?5>I0+2+4@pN+86k(4
z&xe|7l=kR<vc@$+SC%@TBMr;1NSm<84KXL<9<o$I>~utWUN62j7R+M+qizlzDL42M
zt+J=`#LLl8GYN^0Iz{?~i;JxKMe-G}v#P=&hf4d+EM9sJ8{V(=^*p4Ve9lU++{t2@
zhs4ugxf*FRX>^y7k#XzvSPJi#EEDYKv*e!dezTL>#{2LG==J+mdO2AGUqBqQTQ5DF
zX*BhQKsLdlH4Q#k^qd@^Bsyc(ZCL5QGmS>mL+|7&dflmxHipLeDHGqBMvs*gtK-pF
zqtl-F>+Qoc2}CH0sS<mONJGC5uZ^Y)8akd1%7fE?EAgZB$rNJP+4&TvVm%YOU;_;8
zIzPtaIml)y7Z<$<H{Q(H*1`N$h9@FU)g-Y<k*dGG-21^dsT(MJwg3|6*Rv)5?xJdG
z`WwQ2vV}um2be@Q5cr)dx#0|;ue4_Oe&%pmqBT73Cb<^j5+7Yg8vdi_!`)I^gv>TZ
zLEbbMIIX3q{-d`M39hK$wfh-0cHZMULrORbOpq-dwyV4beq9k^Qwh7pIT8PK*Xe98
zu^)7h+R5|opJqJuq$%$lGs!JnXu3&sIGjlEe}0E>az3{V{EjDfuDn({{}i?2C%;a7
zPCds~aWuoG=-t}vG>_Dn^izy1h8cvk<YbLJximm^9CnFi0kz$(-`mW9LUWAa;yA?D
zZ`tYTH54-eykW=j6q?jH+*UOO=eXZR%bV<V(dcTg=i6H2E}x(D-)~_2E;HW!A*X_X
zoSX%eHdfT7jH#}!<@s<YLicM$ykDb|6<trPXQm}|ZMr^&=@KNQYje1?H?E%eN0q$*
zEmEkQAiSfoT+d#jR%Ej-Eu#$i>O0xepV>>_>q^CL=!RaT46}3{808r&5NZEr9R`(b
zumg|d=3_63OaH4(e0OF5d}M#86%H<XU7V}orw%WCe|t{BGCEo@t|1l3sPKHB2BRS4
z;)xCC&xZje`LAj-bhYb~jNJMrLp5f_s7`E8pBw)tys8_tV%fjN%8-V*A5Tq#F^1k7
zC;E`TujANT+g6CP2#KB56c=ymIpZV6zKlZUC(+e9Hf%edWXcm@SyjSzbay_A+u#Xx
z$qhf1BLCC9BW!Z0F!^RQ?xkT<Ok!hiWrUZ1rpHL?X#tA0aX6__#|k6qoA~&#(Q$B-
zUe*DfuEPCPx#3nlzz?Fnr$el)o1(hhxJ6i5S+yG!5d1|Bx#?(Wg8|f{K2*b+@;1Cw
zJnPcH4$<^5Mh0c|qfz1Du)e;@+DA?P`nrs1#M%FshXD^%@ZE8Htc4cN6m3EYZrAB8
z5*aoN0jICv=^q9Ub`HiZD}#kA_RnA#A&9*6+k#i?J)6Vg=~#61_cLLi9y%<K8?WCr
zRjfKvw+#M++l=lyPB*i&9c{o;+n4&`wTi4Vpa<#Me80ob52&I_O02;s?Pk3e(kVdn
zMjQav^-M(o&b?dMYr*Hp!psa5$lP>v-5nhPeHnVma_O&gishJk!{5R>4bX?h8G}@<
zi<RTbN&VpNdZml&MjgPJK^OG+gQQ-%!Ea&9uO1L$24Z#6QR*CCIe<Zuje<{u-hnta
zqcqs<@er`Z(V=@>KCiId`xatT?&i#{yO;=Lx>!6gQ<^OeMBAb*xxGGA?6fXm=)A(-
zaCC#DC8|2!Sf91Y>b`ZOIn`EXb)+HRNb$T{a>=J(_qyBtqipOa)R5A{sZxzz^u`rT
zQKE-0LaJ>R1?lO({7+Oz8=h}GTej<0x3fFCI^E&6S$MQHHT5CiYNpEUMAQgv`v5u-
ze%T!IR&8(@V;IOOlFpDcV`5_d5qoo=Z;Kzx%|Vnoo`;g~^{FMhAO2>&2O&t&J<{)a
zFzT9$Mg~vS`Wl;VnpOK<B_%`mCf*Tqn`AF=FenkuP_@B_2NQ-q5r+Nd@MV5^dBjt_
z`D4HmoE$2F=qp}j<h0LSuC!#$G6Mv-mo$r(vPKXR##w1`+o4qjSZ?9>zZ>KWHb_H9
zE>l!q;FI-y11BJZy?j{!uGAbkkIM-N2p||(OI!>!bzEbdfM|EP=h}Mtrua9d=HyVb
z5MdfIGW7!}fAYFM9!$y)ywf|i&m{0T7<Kpow&sQRm*y{zVaLbk8muck%`D#mh`%fO
z;sbjHiu*nei0XAT5J$VC$ZHlDX-zJ19qpDfT-xX0dK&BcD710G3)-Rl=j~9QdF`Rj
z_qMQHO?3cl0F$7LeOPjXTpIdNBaa@W8r>0G@Uz(JBM;bXP;av>-u?q|js|-I7hjsY
ztL>76%JA>fDbD8R0QJ?^*I!y%$`JJOk9^c5N@AaymNrvR@clcT*s4Ohx|EwSd?6^f
zx~Fj+(_g5%9~{g`X&0!>9y7D}l;X3K6E=?H7TFC^o#x7Byed_?PA{9TOo@1?aX8_i
zS&M5dGw~Cp?-95X$^RslU3GBhpBXFfKTddK45u?^A*WSIaO}Ss0PtWnh3@|B#}t90
zXiDRFP=?<KX)!T)PoK`_D`rKI@evS?nzWgqZ`(|3rT;S;(3zS3+xId0Nz|^dvE8h}
zpon2oqiTt?#fseCl(x~Xvrg&5f%jn`Pid}+VYQ*=3T)xlN@Lr&GA{#teyoezrAKk%
z(c_47P=2HUe6h-ux%64^=oH$Vr7tu6UHOJa{Qk0ZH1jBA*g{SfANq41^Y#=I@s&+O
z<`Ss$HXr=b;dd;^{NJ8|orY|&K%^lQ27^&iQR%jLN^waEacsR>Vb0gB34|}!=4cq9
zv4~cC3N9#1`mw$A<he*@y#@e`L18Z*pHIqVT4d`RM?Az<YZX=L_}je#4Kfd4i1dBL
zC2)cZ1%zqy2$}d22TP~ZKw)ECBwbG+B0F%ht>m{-!$F4PPE0=EwVov{G4P$=qVLIA
zZdR6(f`afOK<^P*j*vI+K3xO+#mLBr++;}-@KEF9_rdk4?(SD9+%_bf=3{ksD@{%O
zKz}uYLP*XAKk$3SzlI7Ja6ye@OlAU~&b*ZX((p;+oGl^s$zSxKekr*kTEi<T=q++?
z)MCBZ0e~SN;4-t(o*Ms%U_vzy-w5x7Ry{+K7+ts82(h*Rqn?H9QS61@7QC42)_Zie
z7Nz@`{s9)Z3}x4QqlF`nMT7`XcJ;6Cf1BWWe0bYSPp9TJDDgZ$4}h*{XlQ?tWI9|M
z-<p~fq)x{w<f#FdrK$y6v4xHKljCE5X>dE1xQ@&IoD_><e36MJ`z*c-Hbw5@C+g9y
z`2J>7KN4%&I)tUajUr;WEB>8&0qi`5idcb-FGn1I6G2Fa%wHH7gO^-KYY*<~xH7fp
z?&UjIBy2}%qO~NBvFbT}qED%KR-&z>%_(Fs!FyQ3xBk@S=f6w^yL$$iq;51$jtzg-
zcP(l=XJfwfsgv}P_|h57a;X9hKFTp>x?r<$PfShKa&>*}g#u!AJZNF{WH|Xo)+K(F
z%MkFc)NVxYZ*)7|Rv{!L#Ky)ZNAdPmRvCxlYcwwJgL_DcD_0w)(&`DB6xyx&`viuX
z?g2LH)AvpfiDLHD5n#phycYR%E<{t4dTEQSB)k|{AiD((Ipa2>R57hlcWBN>PKbG+
zu_+eG>Dk$|7gOQ8Z)B^fk6l+aK|Tw@mnPzGldRDi`B$1T?yGWVtw(|CJe|}h4)kKw
z)I#<jyhaGqy=&)Bkj$auXk{%s=Bg)+H!<gx!FI14Yqt_KdTUF1KVZF9RaYn8A`ZN2
z1W_AP)ofJtaU?&U$UtwgN@DnzYTdUTDapyWzUe%M#$z$({cNT`!EQu?yu$+(MRR1t
zsrRF)2!w&1d>QmJ5CqQ4v`1B+pPZbmdP)QoP?LKzM0kxdT0l-vYdhL&EB)Ln?PZt+
z`jX1G3B|CTWKVdauq@M(V4A@W`ifHa?!UWY+nV6N<m<DqY5w*R;+}>N^S&&9#=+<i
zFk33M|JMD*UVZ#2XQ+YQl#FIuS+G&Iy$7b3mzWNX=jszqaMm+xYPcfigfs6NzB36w
zjMd{#HKnW@6V$-clEPIH9@^Cp!}2E-1Bj}?oVv{>Yx+jScR(5($@(gA06@iUZ~*+F
zJ>x%^NZLkUi*+CJYx@jN2e4xo&5gP}hM4r9USznbwRz318M<0G2*SifGyHTa?GexC
zhs(kudjbQ+bB+k{?;+BHb_b2;caA%M#;Wt!d_Pg%d+79B((=SxC=njKLwJqMxDM`E
zzn+@kMeB>s9Awq$xh^sB>S09ojF4y0@Zil7GL&l;<f3RKLgcWZ)b(>4Wn#jA@&t6|
z4huw{NIy}wg5w1|2R9y(fyS2%3@E4UPm7V?`rc`R)JX1MCqbF{oml8FGdqz13vyYs
z*~-6`T(lZ81`05>DfMgVvUJnf1Qi2KQ;fz#16+qdYxc9WUsS^Pos^`o`2F<q@4Ilr
ze<+zhSAJ5$o(GDD8nJOoq(?OO!+X09@-4JEgmGn8u}h~)=D!y#)E&4ziC5yr7QcSe
zNgy*@ta#}=*u%i%$)}li8k_0o5pASkez+CpbH`w${+i&+yxZ5tYx9ZQ1UIv!PTwuH
z^bN+j)Z7i#Us2H`Q3TEDf)*gGnKwG`^=5F-6Q79^+t?h0g@x%7f?2Fv6%|ciWDgHS
z&`|VQ7{GDABA2;I8c@LoOJ@ot5x#Dmmb$?dY3$bvN&3M%G=gAX^iI2bJ$*QZjSX|A
zz!@J40IHP22d{;qh<zV}@F$93n$&vqI5TO-&dbF*6v=#6Ni3%MJWrx|fQ9i2jwmfn
zlb4*T6bgu+Zp?fwBn`A?%r{OUv-_p>MXXPe0;%&TJWVz%W)uyD)`PuyV4EIt-e%O7
zfz-agkphL5Tb~s3rLpR}RbUxfa^(5_7C83`M99j@0$5>qSQx0}HEOLfH{eU7f=IiA
zn}tdrN&|cq6JKxQBX#YxA(-{mHOk3;0URfLN+J3G>@iI?b-(wUk<Z<0j)^}SK+LRi
zB<ogI+#hZZHEwQTi%Ms8%6te?^3V<P?jyVTwnxIKM~BPikA|zWw`J|>5kvY{m6K6;
zXp4C7m|os)pgx+oSf6DDk*XkjGRX?F(Y4fT>mzf-ax?{D#`>^v;)I=K;%-LM{aY@N
z*C=%Cc^4fA*jVu<P}|D!0?RuvCt;8c;GsX)+y)x8>$6QMCPV2e?=3Mwnev_M@0w+O
z322ae%6M_=Q?c+q{2Z()1otP>9w~uFjr>6<?!%r;*Dq-Rgh7^sn*zW?Hr<HJ>g>{d
z-47Xm$uEd@ZF%p94Ucz{OrxPSxwlX6EncwxIO>Z;l5z1NJLD6#n5Dg-$c?9b(LnHZ
z^!z+f`HJ_&hhmv#*z*L3%bBsTZ<r?RiQ(~E<$_YBf=h|UPo!^@2jECNx4Tu;)xCi)
z=PELinFb8KS1I{mp#4r047QUBe*%5YiaD#s!rK{uo3bgY*#77Z%j<W8TFi(A_+UVv
zR+GOn`0YTG$tKXxY1bFnUo^M4KG)4a_xW0@Y)dQm>v^fjvs*H}iPQ_E;o5ILngdU_
zj|3!T7r&;RYZ9_4BT-}4VbSP&bc}RVMz%PRM~G&2h>lZ0SLlLTzGW-txN#%gAUx50
zsp8=tA?ADM0TzWQTzbmayT6I=9m&F;&bHy_tWowJRNxQ0%hd3W;^X}nsr+|C7?-%G
z%o%S|l9W<^C4XE&DWu>9el;+U2PW1MmHEuKkJ7|Ppw@+jpezyIv^elc=|%}_uKz0W
zeYcJ!HErpOOU*6Gh=8i+t?gU=xk==!sb$9(EN>+;6_&q|zzy4hGQi8-f`~-fctO_7
z>ywVac;R_BX3fJS3J#P3ZrG=`D}fmMs71CGG_>BrN}_iDCK_tKC5u)%3}hyIf%mLv
zacD)E`oxRsUccOss0rH~;3K?4J!9l)mM?xLmoOh&-F-1!JeiAr>p@LcpEe*^9m0Ld
zuA1}6S<ZGY-uYbIn%BZPf+A<s<%u<K23(bF`s;p{Y-E+4`08woftfQj;JDm08j%7;
zKS!IR3wM>LzdSiZjZG;00m8yMfqlhsx(veiBc)GZBJrLHj3UBA%>-I*t0g3*Cz4H;
z<ft^@;27A`K_WU^z4u%(-u9)9UxQ+XIDNansr*IU)|TJPNh$}){QF;ojMKMHAu`|E
z<Ev2aSIe)XF+!yUeO@*k^KZ<>9AqeAOy`|3H4@cb?q(g73K2q{$Gp>9<VFkZ!_JAc
zmEq9&dd$Hbx!NkrScP`m_Q=;5voQdzFGk4cavU8v0}Rl|rSZNy3~$~uh~u&v9~<jU
za~bVXS5YYruRtGrw3ZYXyOnpo3?xtj#RAMsab}E=m9|{nA7*z}=RG6yzO3U$=1*h6
zH7r90s{B*!YX?dCJs$#hI`!Gkix1}0qJ9qarmw7=&Htwc)%6*q4hCoq`|u@oCFFzM
zTf`^B+Bet^gn23FFe{<6XL#~SI!&;h>la^CWu(2e(1(ZzG^lKS4cq20bs<I5A8tO4
zho;8pw(Td;(9<#YtR-?}m2FDwcKK5`wnU${2Q$k(5>(<{U9nI-I+IE2ha_^HNH*YW
z_nv@ET}ycdlDt=jS4+C9C9z3-wx3fnC5^W2rzW|B$*onV$GU4o-Zl?rBv%{3oea)N
zKlS)6;|L@3)!)In7s=0kF+BR?MtLxsD9(oQcnHgz+gqKKJ!8}pZd2s;?KT!sdq3er
zPF$Y0hrk0mSFaEri}E)m9zfFK;>7z39t%@6rXbtiFuy$LxUGt)%sSVRZzhY;Z`=~t
zP1`)bGFJZe9d9go9JE3&Gj|$^7WFD#4rpMkOs4lfLrhIlYP$5K@tA4!4#n-C%nlN`
zlY8rO`25+fNhu6qT9}UXX^jZmwP?4Zs?3aZ^!4~Lg#%9HrUvt!V`AYaGoUs!EnEjP
zm9h}Y3)p(R%M}s!x$B#O8>G!$m&n8<d^Rxd?uh`kW#NL!80&YWSfU$+Wnin{U_Z@m
z91P#*Vp%_~wAqoPWy9X6dL#sy@oe}sOg>}ytI}$o8oD9y)F#b@X=mV(7!=@jEeKUf
zNz>p)S>0y$&Oz|vi{S%~Q&*I0noBJ^1%&OR`lHq`6v*gNpH7|$p;dii)6MV(1wX`b
z=c|Xupm)zlrDRa+?zbvR-yFoC3!L56?AJzkwSj)v%DK1UCWsu<_0%sHzqA+;R(84i
z9%D$8-?P~42eI6G>*hW+5AoEz3NYxu_S9Kl-rRY7P<Dz@T=*0m)w+K)t|m{b!#>2<
zy?kC_!5;kIi~-y>F)LxrZeodsCF$W5`B{NK*~M3`k1-%%L(rTwHfTO{NcR}8mEhwB
zf&7Vw0K<hg=zIXG0UgydKoPGQu)R<|dqVoYI!A*eA-Lnp7f%Ic!mv>+G08H4cnGvK
zJ)B0r>J^x+tnaGenw4nrt8g#E%}h<XP86w6OiY}eoq=nY`+Ix8KL&Et@U1NobC)AM
zyd?X0d3zL;_p+ioKfb!=d*(Bo%ItP!KpHy7HooHxo~R#g(4ux-z6iceWygBuqJ!!c
zRx#1$dd*~Jm*FGZf5Vme>ca6?+kjagR<4k%I`2ASY^t2>S-)1TQIY)dC2h_Mt4<f8
zt-40473S<k0o=<iAzB0x5N#;|RZM*hg0r$jjy<c%z0X;qgN}aq{4;TGE}Wn>8Y_3=
zvr`Ee*06XdHHFeK3|+cJ!WfY}Jn}Yb)N{LXUQJ04X5LY&t)>K>s+^qWhj#>nyWT=6
zy8ha@tq426R7|L;Eu>-}a9)}+`+_E1gI^AolcVbctEOLLdA^A~KDyCPN=_!J)N@~G
za>M1H;~Dq>k+sHN2ILb#x45;IbG(~PwnNvrR2|!kvxM-CXzv>|=vr77-@9wr1?FLu
zZ>$AQj^p)VMpIIuPnM?DH6qnPc2p&?l&*|P)^)UE(~AAXD>ZXZ7^Dlac0Ok%&!_CI
zZ>rDn{6`~nt37<N$ZP~|A1C{vjU^t@L%N~tjr({(uaJk}2^t8M`EbvFwF{6dvIBOv
z+O->`H!+cDs5~7B4a1>wR{uINq<_8%!O_{eTz1ogD&zI^)(}F30tE9C&|P#wFPtx@
zxg~!Xug)Iu!*jqKO)2f*3vacAy0=CheB1eb{wUj$Z8gyMl07TuTSdnEP_L|$yR^;9
zK5hJKrAZHeHS9OH`m4UDxH~ry<ArLvrr4av1|lcIvokaB9DKaIs#PfNdj%brnPavl
z2AqBQQDxaN=cD}-O@Xpr=l5idDeLtIqY-^C1*{AbgDZ<UsA>w;U&lx%ap`=uXa8PB
zc&TH_Z27&CrY5DF-e~&giCAsLLFf8y7i%>vZ%nkceQ_#S6EWxQeX4Lnc7Xf4!|9!-
zGKs}c@+MOO-mH%DBW0nJy2*7r(l3R5B$=xgQ-3}BWTxk6+FYgb1f3cS%vPs}V?Jgj
z-#-dTzC;>Ux1mtiNSy7j#8Pj*87U{TN!RMXy$C>i@56Bbkqxqad|vTcN{;0sDm5Wa
zi9`R=0&u55@LPDXN}Y|53jc51aMX>8PvUJZ9L0uT3EjT|L9SvfhngJ8tGUdCw3%~I
zhAgZ7J#h{6hJadtUwMC8bw>f-I-MlS{?r8fB=r1pXXs-<fDy2P+v(QJ9|Mc9&6TLp
z%FtTf!7&X<vO+V7@O{afX`TU95lT)`-ZL>c=;gwWx(F?M2|tU~ve)}1|8V|OVQgFJ
z!<qBb^Dn^JN@6@PM>vH$T<f~1%-0D4Jwq)b(QF+DI;-)<IpbXOK#yS*RV-5NJ3$UM
z4*dIb?I*K?^k34H(m{d)%EZO`rI{W9oOhyY7-gzYpd0G$!Vy^m=<vg{pGrCM7xO`z
z?B+|}_Bx-inh^n3OWoy|$zGApdHE#_VtNSL+zdm~TU_jUL{J{iEL886TW{o6W#yZd
z(+zlE%SPETqD=rTqW7|BX&qBBBqR&-a%NSbq`3GZzs;!UrH-sd#dt^#9tZ>9kVF0_
zQox&fCrS9OXMqJaHQN8sQ1o5Ex7XHakOR}rXu^2YgJ(<sK6LU+N`$K2st~!##aCZr
ztz;W7?jq2T_oYAuz{hOjC70tuIf_Wy&ohgh4OgM-%?O!c{`0yjKP36r{-m>dZ|}?c
z8^$i@(S)PE=->vLGbd=*uPp$x-~YOIcg6XS8z+bRdhlOy40U)M0{z*Suumk=mI!fV
za5Dl;!hSt^GWl`sTZqPQj@+G8mv`hyk!R8x-gLJY&AS-#4%Tni1n;?psMF+zIy;5f
zS_oUE=sYi4n?SmJP?uW2JXH$?_ZHkn&Qf`SIK-XWb0`o+ZqK#Sy|?>Rv7Yq+1Gklc
z_3M>~4(PJpc$C+xWo!fpT(z8v=JxbfY^)J2#XIiXZ9Sp{na4>Wpg+M-Lp(FMYnw*9
z{#pe{xXB>{Sqy}K*Vqs~-A4}VBX3<QKi`?{5M>kn#1BLHlai{Ve2C{D^n!4|K7Os{
zddELpw|^o#LN}h@Q_4U$H~>`9Plw`|=|1F=H}#J?$c$-83izTX3JSdHbiXCcvof=g
z^Ypm+9@`Z9#m94i@s7LYkCL<^W$0`H5nCuIzKw&v%L())q6RzE_={$~Q3p<r{lK?-
z(aoXnKLHXEP|GOm8sJXGOuiG?H4}Oo9MchXUo2jTnwNz6z?^jl;L~;LE<+cWPV($>
z!Z7&v>JMtGl|v-lY3v3+!~N0Jde>Zmf63L_AF=xKt_S=6WidZxcrqQyu;=fIDyKa5
z67hBKyZ5mt)uvr*jO%{eV=vr~9Jv43)6>OHJrIG{$sS+w*;u>fMIPBkN{j*44TK3%
zjosH;kh-aP+=3<y_o@2}rn%Jhnw^~@rvj>Yn&=)5WFA^e{)DgX8Wyj(TUR@7E!PeF
zV4^KIFx@TNj~AWfmo!2kK-Sape8fDYC{j`OANn{PK_WSj{q>G1g+y5U!<zwi%y7|`
zAhK!;4ZSY_VND==XvgJ}Nj~b#Mb3WmKpQZ4AXX>22(KPA)vu)q{tFH89Pm|-8998x
zAf+$2@}GS{HMf(@1z0aRf_C~wu(!Va#tmit@VQ<?&24F?LQvfcc;-s=OGEpmW3ep+
zQas<bj2qH)IAt$(-pQy>O`LnnvZ}dd5^{N-0KcI}NKBk|cl$+d`b4_vfl~*=3b{%9
zU!x)b)i5q{`6DYkA+jX|26&E*2jB$-8Ce=tQC1m~SH8JpRgsyz=7rr(XaBC&7nP+0
zG=);9L&ES34_UR`xrmqAmPoIz=2i}Z$e++*J4=1AJJI}gK0?g9Kuy3reQI&hMJI0#
z*G5X<O!y>@|3-J=bw-6pavw}#vg4N97C=eTmVcM^e{CIrM`tdT4Af7S&_M|y#^|Xh
z?z`SJ>Wa8oRmFjTfG~Kj42_O9`z<|N9&Ch&C)HOOq}}AEu-!a0;-pjoz`ekY{tx2e
zW&kWWT*$biu5_%3s2f6J<{OzJ>luZQE|rLvqq-F+T^;*$@R7{sHs>>_{$4IhM_$Eh
zAjX-hW?$F08n!K`>Eew@PojZR^Bd4PXi1U~Ud^gU_!rSZXm|V{uA99+iADfO{~vpx
zk$nKf@&gWXxU1kO2|LG>QRTkkz3%zu^(F&<GdJYJ)2Wm@n#?C3T8#*GJ(CTqBa%Z_
zI+}OimkquY^yKqMWZ;ZQB6>CHusHc~Ld%tUOd|+M)j~<agW9Sk>XN6r>ify=GRQah
zwj+_vYqhQ|zS@FVN$`JSr8p*3e7Fro714B%ulO5;sUO<|=G`~;_F$rl$wRP_O~Alx
z`HG$RvsupkuOyz%a|4IS#Xj$p4`14Zt6-v=JE##bS;5;se3ZMTNu3E9Yox6yX(tX|
z2fO^{Yq34v_(V;aU_7ercojIci;HLI)H?s?@u2QDh4%r2d<b5uW46lFC4ic>kw()C
z(QXZv#T&<Q$HGZ=ZY4*#$?9^YwrWmHy1?^+VaQb2f9YZTty}rUMQf0B^Id6ishIyt
zO;u#ilaBz#>>gx7j%jb*h?@)RtwxnL!p^(P;@r(&qL8pQS$Vzy&i!BN<$nSw08O$A
z1sPNNAUkY8I>0UOGn=rOX7vt7g_ixt_u}q8evY7@7Z_mFbUnS(KatSCjCbD3GIe2O
zF5D|Q^g^Ka4Z?Xgz(Y+?@~mHLzP$`P=-kNgtkp`<$jlrP)54ud8<ZSc{=iEL!prA}
zDnSw9&A;LRaZ{+_OM68<cG&~}G}R?J1c9TfuESw%B9!>!$fJegR*v5r!dENzYhQUd
zLeRfRM~{YnTijT5`|9x$l;|r5gz4*ZA-!H_ya$H%lpfKI6h+>q-P3kF4(@u=f$POj
zK}YQ4<mCPPO~$_f&Hu?U;j2)AtO869&&xN+8H502QYnglBKM#32x++S`*&BZ8KHT~
zk_E(_e5D^*)eM*RTd>e3c9aYkRz7{Ai>*Tku9F7xq6%fm$a^qFb79Mne{Kbb6R;wo
ze~lW9j83c)UCPal`YI$%^>%^){Skk^RK3$K1LHmWmtK$Wr2(9C#FDpo7VF{n@6nsj
z^4J+x4F&>_Vs4{t-mE=YX?O{C>ftd!mp385sKMnx@Bth6gmMmMyu7Z@Dk>^Yx5u*x
z=_LSMocO(iikf=7+Ct5pdFZ<VKadhOAIbh49sM4RU~q79IypIYg%Kl62J1waJXGC4
zCjxN*Ll^^=`Rlu8HpCs!r`1S9Q5p|!vyCr(&IS<}zZ7D$zX?yik$N|psE{CDHe85=
z?=kryD<seWPFzuOSG$Zs7H;cV$hVQ4m+#|h;z~lE@R5{bgxn{QeV8QU?Dl7LZQ1bP
za<NX~V}I&(?!K-=Pz}Eo$ILBZ)t|SHk&HcT<Mx}dhlfQr((~=>iv3>Ar%fTIUo_*Y
z$yw!#waE8hUmkqMkZ&syl<##*^4jCv`DI~zw8L;&YEYq@`p(t)&~lvjT*6r-l83sK
zLdavq<7mrQC49|^sZVpIW=<e3gazxHf|Vxf@W6W5YTGyiSDRk)pEa4{0YXhC)=Ca`
zb@QelmUm5o4wxGxBqW0wwR%gQN=hr#p|S}gNHPu<nL)O1U$Z#9k|iI?MpH^iWD15;
z2qjq=A{jc)vFIl(4H!)#=GHpZq7sgGdkfnbV&~GSGyPh?Glv)2S(s02o-M>mZdXYS
z)HE(fA-8L~=1=AgkcJoRu*roA1(Fop4i>jpQ)#0{Jy<<P&f2^&HD88LMU`5_wb$Kw
zeo`Z@u7@JQT`}1Ny<Y8wC3%osi!PmLpZmt?wv>eSda~*>;Do^?@=aZx#gd(`lev~n
zwT#S_+ZPz-94(;?Z**h2bQiY<moz&(tK04@Y?vFFT$gLZSn%p^x7?o!Ru%9LItdf4
zwgTLt(rIF9!Yp=wxgfsj#)CQQTKi6O&|!t;^wn6-nE6a~4l%rDd%?<HeYtgb2uEvk
z{>&V3Kui|ih4k=y(9g`Dt_sdl?Mzr}RhjnqUt;|E_Gct@SN_5yBNSa$T7EFE=2jXA
z{{-D}2>V<nkcgkFvLbByA16fFer`BFlp5HSy&TeQ*4OemTOLp8nZ?-iw-#_y#0ZYM
ztcPHJFZ%8j={=K3h>kfi&*OS_blX+!a$hX)_@?fN1>T&g6-hEpSLgP|Nc+td>`3$O
zTI)?&^?BAY$|=SMAOY4eAZ~K4GkmVc*}(JUm!p_DsIt3N{V{wABP^=XX!O{)TanW-
z`ScCXpt4TgXe=n4`;~hyH5O;<S7R+E<b7<cFnhbRnpv-5_v-M^p9_veV>k;Z_GZe5
zrhl%)>VB~NsNp=rX+B#zjXG?wDUaZ&Sycz;=wvm2+x6x}xx<C_>!zqvqT`_)LEf~J
zD5YahHDM%DoyJqpdd8%0)~!6~U&*w%r>bi^>HZWZ`uvLLdd5OhiNUiCuVw*L+7sEz
zMrC3j>mpKe^G>xk{<ZRQ=qEbm>&NhpDcpb9gxR6&5Dq<#9~{p(eL4Sd3UfiZAl!P~
z)k`Rtn~xx}1NTrkf4D%n5V%md2)M6sv2Y1+$#7|KP`E6(T)2V)tKXu41S$r^r@Tis
z;Gr@Ht?5<G3X}9GSdkJ7jnVf{aM5<@>3SL6*NL2K^C(vav$k{L9JS|XQ&UPO8-#Z6
zdmu*n3eFiHlJ*@(D)loek!opd91aEqBfqD&y4~!CyUZz>BegiG&fRsLEl|(dJq|Pn
z(oNPGgYTHa9!Z_R&})QtD~v|v^bDw|vVvUE&Y>|&KdN`baLvVR&Y|rnsri>%J(r4^
zQg;g*z@VazUSPN;wpj1|@~V-T)0R&cz)yP|(=*s4Wff{22Dq%lsKfq?Rq+s0wt!Dn
zXXw_KpC*A}736zjN;Y>ay#?DcFEI|8mPIgCaFW%Q-e;)HipxOlPpscq0DcCZMM!g5
zVfsQA@iM(?Z~v&9HV-#L-IM<j$)KSAjyB|E@?!ZJ`nXO2WFGmQ*KLUu&4QAaWkdg%
zYfsx{E^LG86$eME#cLPfP<~<b_rHs0W~~fBx2@|AaCNfzT2Oz~CJx&m9%4_%CSr9d
zQS_2E%&(vGJnCJOnN*NFa<dk*x$uJ1d@dZCA&$7oKnP!uRBTZWZ~EQmo>p_ff7HvF
z%Z<*aKU%XpTQ%#-5Q6D`b}TzJzENp;pj01UwJ&YJ5_hDzA6<&<T~f_tLp(K?RCijk
z(3KMT!Do_nw`awqZUrpg`(gQW-AI3up(TKlc(9-?A|fItX4oA?iAVD;u_MOM&#$JY
z2B<G)XMe1<p67Mkd^T5+G^RR{c>SPiC>9DoJS0<+%&4W1l<|hiTs)LVn0q<{vRT_i
zh%R855wXEkdbB1mbu~zkA+h@HHz0G*qrx>{CcY`O=8&Dn$=&;9%cI*~X#qsqI)}eO
zIIfR*OugImS*r8#Z=K*zG#8v{QTz)ZO?Wv**9FckZpR10#F$%Nhlp15Q3}_>#h4-u
z^SH`uG_q<9P9E-(*dUs+R?$KBHteT{hi!k}xEXMw-HyXQm}ygm`O`=V;VR{RE6Sas
zI!=>=Tf{S&Ia^?cKEMnSK-xp{;Fk!HpvCbiVz8Xh0nbQ^Z}Bzpts1__<6{#!Yt34_
z3I)x}GW}u}Os$-Dv-9jyL*H4Q5dzI4|N9rL*@T5y>dj3}UkB5cCkNamKe+ASw(c;(
z%00m=QGIszv7pAmB^8yw79<x4wjd00%LxR09?gIBrC~CF#UH9+4MT(F&AnIz)2fQg
z8$G!P#3mP}-HsR0Js%EE>wiCw;H$5=<pS<qG-SS@k-&B@fi<=8h^k3NeTD4YalsGe
zQ}KbPf!8Jtk^7E~lgIX;$F-c$AWI>EAS-&Hxx~XSSrhoGYCCF;_e;6VyP4yd-KI$?
zh&cUbHOCoG?(@p5_{C;pu1Ez6!H*B$Hw2OKVTlQFrhI&SpC=xts=3lp{Lad=iMH2q
z|HWU8UBGL9cjR+rDyd@lsJZ@)s<<(KSG@_?dJM4jb?T4?!;J$d-pp$mbDXWaly6Yc
zV7ascaZ&QB75lsrAz`CUhd(u;W9Z>P>MH8NE}r9-g$!u6=^{fZ1U&%<wehwOeGqj6
zpS+h=<@{_frJ=Uk=5AJ@_$z2&>VlMV)>ji1dO8uj^8p2WCS8~u9lmNI2$k-evuMNl
zZW=D;`{eRCjZ9Wxzd+lHNaGDWLAjgEMIYor_Nv<B!LGPLBeInDG$ssP>PHj{w|UPW
z7s#y@cDUWg-!83EVzTCHt@M~L%)Mvv1$`8Ub5u=%!wH=G=u6HkjFFEzkBbbSYs|Iq
zoXZBSy>`u^)%eDS<e$*d!)DJlL5f=$kh+OR=2yq4Q3kfA);uH`<QCt+b8KFdb8Kb8
z8t>>|9#IPJx3V6mV3AR`Tn2Fk)m<(+ZbD7)-U`h709ovAS;!`5N^!SDkU1qB9EOQk
z#2d>6dw(0O)14Eve<b^;hcA&r2puP&2!6AX(Xeyy&foJYd))S-vdLIL*asHAaAn)A
z#_f{uM^+MhA?^88<P9SHjCbrdpREOHEUs6Cw%q?dEUYbdJVjL$&k=(dH8FYGHFT;e
z@X;di#kxYPTt*r7-`8tq0rB+@4gAUe6SAGkKO%g%DBO^$8`nO;pqi))Db?_zTuV=7
zf9{=r?4@V0OT3YrXJ#Wpl4msAE(zCS4x)Jn-u@@|?)0b`4LD$T?C~HZ^CSQHmV^}l
z{aVPwSB=r1KVYDJZh3osWz+ENNEQWr#M$hc4i*1IzdJH|Va}@#726AT?O^%-28$sP
zK^{fSDW<8siQbrxWRzyi=Q&t^M*rIe>M#BYI|Tf7iX6Vk_urrxn0(HcEi$#_xNn;5
zCBREvHNJ1SyCNjL8<CMW4^ix5zK?lk0OiK`CEWLjeg6tWal>Uz-=9nxJB?NZQN9p|
z;cW^}K_c(hfgI$g?}zTpKzs$c_9Ld}aG_ILp8qsWME{;aHehmsm>lm6SoNxwtHM9F
ztfSxG-%`r2yIj;1_S=3l!G*S7Yp<KQEem*|=~({qcv`wEw>7edjkfSK_@-Z|ibis5
z$fT8!{x;+P|E++3DXv@(TSspd;bX>FLm;(aKEM1UM<=|Xq3SnwV+_!g|2=Y@U4P{u
zs9%6x)x&|);0GRH6&t=vH)P|udR>pNYd&gy7F=xPzGE)a3y-n6ULDeB8UkLch))vb
zuVw(^f1KOJCy1<fyJ9gl4xv3kd3{~a;Go=vyg3k2y3+W6e}e}RFz5)`%b$4sxXgB9
zL$YJtfLc>?7AT3L-%CA+zvvb|m^2Qw71;+KTDu~YRvYc>8(1kQ@th0541dd`|HKyG
zL1)9d)Y7GC&IOQ+(cz$d=vW65c!9W@59hm7y<X~D8ai{hd$VpHa4`yhT@26~L8q>H
z*o;9*X02ueDlKCfkLhU6?QIenLUqMZjW%0FTeGp9TVAWr5GHTj2fqsnNd>xcP^Mp{
zv51?r>0S<E?T<Zolz9p^nff^#cP3BDz(ToUSGH6PAXR2(XaCu1&#S39>-dc4a<Bkq
zE~(}qK$u9O7_@>;_^v%N*g09*Sy{6PGBszw!t-e!mqAGuo4^<ipcZnFgv;e=YH9+(
zNh4$9_=E&898XbRe*U@MUUd5IzP^L$%8lttlQwO@mwvL1@gSQrczsT;U8lD+SMOlh
zrj(zvNx%tw-uENK8xhW2Hz%j+tSrN!bb;{j@TcrQ`;9A??uUf{HRa-Hg_D!Bt@GiN
zT>+8Sx2-TXj6NQIE3MWzjW5=Upsvj%5&Mxb?8DXSbzM_ZvJWQ5pMDzjHz@x=B@sc&
z?|RtmaUReI)`32L4E}$TFTu{DWJwDb8oo|aJ|dIvG|U7fQCY`)a1ZlNx7L%Al7P&!
z?7~7R7~cl33aVMiXq(Z)^Ud{v%tj!l&yEuNZzijU|H(tU5IX(54oIEuC#V=0c+3~?
zz!isLLfsE4ho^x+*nofw-m&l@qMo9`TZaiw+??&rvH_0SDEmu2Qyw0kur*;&53sxh
zGNffVP))!OT+Z^jzjgWPkM!|1p6OCsU=)>B0nXJkVKp_iovkfS9{ZI~{x;ozetya{
zS%L?v60LrabZ&d=YmLv1`Jcc<Ha@3OW0~~zE6z=;o~7j@v!&%<Xvr8RR*QmWBiVo{
ziY6raTUkvlFDr{(zYTFI#_P>tnx9RCEz92*1>EcZ_ltYGyDt=&9@4VGR35xL-9$kz
z-t558r?jTbsnnE5?bt%L^;%0PFDrl!1E2FQoo7X4p<CV$V8h!MctCh|3U#=6ITw!O
z(x(?(W1%_8D8gNukc^{9N)Sr$K9t1FlsT5YSMN_75=KCV0lV4F6l?p5Q-1qnHcw5#
z+nfih9`gYP#AJ!Msaj44!BkfEV0b#;RMIccrc(zIED#UpxG8-RD;5)H_Ib<Xy(L#D
zGp#IHP}I`w@fHg{_&%9;N(Pw2Z+|&3sWOuBztL5%U(E2iMdP`@UcCQkWMp<+Wrx>Q
zqSI=>Ty+GBa%=D$5b%kv-mW8=8?P`S4u{nVBk^iq;CKU8vI-!o*(@~SFkghMu6>*$
zlvk2?{!fGrS9=+C_#KVI4_~++y<)*w^UWSEK&CvQui5LG>xqER9qAqKOL}@<Fs2H+
z<iIqi0mFB{^*oWsiun+wLA~hPH_?GNwN<B%$Z!*k6&&Ke*4zZ}n^qP~Ma9K|6iX7y
znwpIUonHpi_}XD{v9V3|YhP@`v3tzxFPT$}#=Zj7=s{WRO@b>Nhwm{s);ZbPq@<+j
zf?o7mH<TjP2q;*NI}>{;HZ5O;GiU0Mv9=;1^VW=r!!f_YA7NtRdk0YbUTF4M>XA!g
zTig7d1=8@n`NmrCh?lE3^`EnX6#n;34G@Q|t*wF4zXqZdJCmh>eRnd$AA1a7_g?oa
zQ#kCHCX8ZPv#Baq+wc>cm?I*8U&Yi4bG)dCaY^C==l9kLAhX^9-IAyXATJORVg2qI
z%1LR#OfWC5$iL{iiHqS|Qc}Xj#`a!KjfI{bm-(7BjmPUQA;ZUE&O`9|E9`J<P2?b8
z_9oyfwchQz?MyI%-=+k6<OLhx?(OY;%4PZL)vL1da<{vidfOcAEpYaHPIpDfJ_9HW
z*r&8(wV?3i`1nAEkWX*yOBUUhC(PHLY3Tg(mBHDq7U=j2!=`rN{9ac+7IxVh$@zNU
z<P2MvMN3y!QCS*#i>AXE41N{u>R_av-RXt@KV7rm<5!So{q`U~1eECe_tT6sW!{zc
z6nl?-u;LVlP0KmK&pE`HKr>1BC*;z2AN&SZR|FYGFtDq(Q#${k*9d<DjBxRzVLLm!
z<KyGFxVSc?=lQ@Jm9_W<_NdZlL4O-qAa0(SNI$C5pfZG7n432^ZXJVz7!|y}zJ3^6
zs?&VsU5t%-HTix==D%K+Mz~!U7krm+AExFb1Gc2r`!k6$7cP9IVbdcHur78Z{0PPH
z`xWbx&EJO~<ue3tmpd_3RK{&uZ);fbFf%DBW8PxtM7#k7E$|Z54H|e*aD|veI0<#?
z?7L(h!?PxJhJr-@x1k`CA$d_zv90(Eakg5iqe5#-H5V?Npu~@UMGD=aV%LJ>aaivL
zGhQO0h(7%uH(QkO{(p_D7;P9V5~$y_)epSjN?|%pVEns8M+~@=Y6g9WyvT#n9A2lJ
z!&QZ$OoXZ;f`J@WJO;G{le>{U+Pad>ABLjh%-7uQ)OBQYqE^&Ff_rOpu9CZN@(+pm
zaDdp$)bw<*PBSksth!yQ_fZL-D*wx}i;;(skdWTLe-D7kHlzVsVl$qN+f3#6B3Yfo
zz!ew<VFsWJ-5m<Natt}gk;HzQEmllpt@Fwvx+LiMv~WrEa)&8=374znRSuUK9%u<Z
z_{O}ccmC;SY|&#e=>KFR->4PBRTUTSeZgl4a1bhCHyI405b_>|ion(fz^O~NJqrF<
z@7a0dQ<R_mT=2|AD*g40ZSfkMxnd>X52o>U%Kb1Cn(B~kLzK2(k230A3R$W*M{8b9
zU!4a8hV#q7cz0hAG7tA1J+JPb-NJnmc;!$ao?%3>fw)QA5h=@rJI@3+{Bs|r@%{`$
zIg#7&+x3!_D5Tk|n1{pDUI2fjC{!d$(5*k7Q9ATcy}tV~opZ|RNQ$~*6fFN48a?=`
z(IorL9~T}ewYO$E+<k_&jDM?FShODfa$S%1aUB-TP=ZoJG?ZS^ZJJ!VvOj5*B-&1;
z9Ys=MmP1@s&5@Qy>=oV5U*W)%T9QhPu%%g4S6oOahGcV{#??oN4)2&hqzFS|&)hu3
z+6hMbd?P5|xECzaRv-{z>7$Y?Vyybg(4~kyxR(IKq%+N#k{#Px7n62b2P^36^M(K-
z$^`C0gLBg7&z}#KENPM$Q&G6TxW8XwefA*t6x(gi>6^}G)O!ir2y_to28BMmz`F+~
zIXpRe09X+q{{XZ(-6iQKfdK)JwuStq>6!HZd{<eoY2<cS2u0lU*kx0mwO)}XXJ(&A
zo;;l`^--|kYLviAL*LLaZ%Wd5bK1e`kXaGbD{TIo_7UoAxuJw<UFS@a#>~i>KssvP
z-G1Y}VyO3RJ>QdhvU*{rD@K`HYV2FmX@_Ku7Y&;5&<!~Xmfuj3hHX($=H<!Rx(4H6
zn+fF!M5Xipg2tniE_%W+TbD|O?qd^xA`Hn$>+MLf674ch47^<66{OVvH0S;bA&Rh0
zOQx7b2#>?64Rsr}(h#BLzN8S*CW2HD1ZdmKRKC@|KIt~fLr0@Kqs^$ro`cpq#@~?P
z(C}OsOytXP#gAc5eDz8iywb%ZPAy$RHhoDS$9on34^K<HZ+usV(WZGW*%3@rc-z)b
zTLXIp?l*tq#taXe43W2r-Tz-B2a-U>#<{lp5*$Fl>H;t0M$@G83w{(978XItn+M?}
z3ThSlf~tVY(ig*TG`&!(C5IciQhm+6;y8NR+0&DL1x+|>HWhHD%x3_IdJqj}T+HwM
zELG#g%tm{s$@S+ig5<2*DjaCRx(-(S*IhLH)LpKRoroNuLMNoYKe#^dWn)2ZY_4;2
z%EM%ZtV}CO`XalL&nrd~BXLq<`g$jLlv^YW9#)>k?zHTvwWDn3;~?l*?3*@{<BFk}
ziUgtqzD<UXtt;n9Eyp#yGS%%f7QcY{q?orqiJ%)ul{7z1iop$Bx7||raWSs(#6DkE
zwLyPHd?9jszg6`A<LWJ=s*KilVH)X@ZYgP{J48SlMY^TCyFoy@yBnmtOFE>xyStk+
z-S2tNx5xLJHO68+b3S)o;W=F!>%vO?XWhr5_}w!x`FM4ol?jG`d?3up9c!5cdHUG>
z$@Nf{UA><=nO#3b=>nX$R<%8ehK}Bzf<5VSwJy*q_=EOMKa7}vW3Q}sYVs>kn+F9^
zcJVaCLwWm)=~g;aW&*P|h-s0KlQ=ltt9#?)<A37bH+eidMx5{$<z9R&ujM-vcN)Nt
z`CBt})qbDv_fE5Cg+oPL&$u8w?d3_t_0`d<FsD=i?|^8*fzJ1r-J8G~No~!(*4^gk
z&(f9-CrjA2j+WMb6Pkp}ac{r#+V9QP9^U%di$kw8F7`bw9dl+c)N5ugz!m3sk3@QZ
zD=c62Le6EBu9JMBg?G@;bBi+K-W^K)U^;^K`ndmi>Y2!AxiaR1mAMd<^U;?AeucP@
z7jX_l7QU-)`ABppCB^(cC-8fL>?-PsAP(J^Gl++?6S6(s+|H?Wk$m?@i`{nD!9RJB
zS|A@^gHk}-$OWB%iE|Ov!BAEe#)K;&edfa+(TT{Z*1=X`M{j^xCgaupAv1rbp|cRG
z{vRu7<1f*-mC(A@x0&RjE$uhfvt3^!8+#V;>7>(ikUjo*+^yaUjwD%<J2P6VXZn`q
zL)sirj7P>teIu*y@sV9SyX*X=Q(i*5xzpvce)P?2oB{Xv7dnJ-Orx3C)4Gu}jtE>h
z+QpX$V~9P`%h}r_a>mX_NRbsGJl5|4B+k$MDSxZU%ikSQDt{m)h`x9C4$yvcrWfR{
z<?Dyr1A_9}3S##xE&CGTL>S~=DAG$LaQ<ekU{+D6MI$mTx~768NT-^Q*B_(RL-`>~
z><4pJIp!jR@*3ct97;Jq3V1|;BF3T#anZ(+wf9dT5pL082@OwDhk7G2OHG==Lbx^#
zn13lzuZui#oc=+9KqBNee;T>=H$3J!CV|^)r~6+Z{;~gr|7p`j%XYheq_$&h?VRE&
zOIBO-9^6hiHr(H<wgE^}lQR|7)mWbK@A^j^Bg*pWTn=CUN2nJR1t^(>?Zu^;-xL(u
zA5VCJGzzd~LP+=u^9%~#BuP3_R_jEVO5^Y1-1t<{_7e8gMM%GJ4#xE3$XcC>(Q=QE
zM?MjdMI(+u*mdL!mBBq=xed^ZV4#y@XVxz^{Lv*9McFIWwqo1}O>2W~p%|A}`$lq6
zAjjg*b_JLFG8xehG2cZll)KiSg`oeN_Lel06%*6@2qGK=@GoaDU!INR@L5nGz*m*S
zoU)K(oh5Ub^vQZBTKniPUbZoGh<}N|7r^47xn%zizi*1_2NSxUil$fFqlG}i1ML!%
zL$zDC-b8j>ahrNtIV4nx*}asN9r~6SQ)m~`vgnK+cETFlyI?CV%D5pTx0Q%FTEw@S
zL8~IF1Y*{<mlcb}>t$bANA-e5_vx(IyXCUunNmm!J@S@fskltD&5sS4IU6#Q9R6ax
z;1|rPjnIpr0z$O6)Xpq?@Uza#68;bc41{Ypx-aUZSp->Ulj@QhghvW7yh*e;P|Q}P
za?X^lqTZLG|NF)J<-UM)pe|iJ=mDrJc3_gi8>7Un_&#^1IpVVjNy$DiPi(ncOD4{~
z%P}#1j{Opy&g<cBK>@L_=t)CK>2`yDwCZvi5CM6`vgt*pEi6m$JB(1^&4h>RxG5n-
zvKzNbp=U@_Dn11TBQ_2W^%GZ0ZpD~9eo=eB^Lg+8jIRDwI9Eo0g<>S7rJv6GNdHwe
zv9XjIH{`;v{I2f3$ZQB>z`0A<^^$-F0Tag~i6*@F%ULAT1_cDMFv2?5w&%2E$nDof
zR*aT2V+4|EQTb0?g%B2|H8>Mxfrqb+eBBw)FlY#e29FfZ)(BmZnGjcP7M|8T5ZYg$
zpJOP9IDf={h87NZu!Gd6f`K5ACpl;)3Z|6XI!;C3=%d!fC{sqy9FR8N+sAA7Y8Fi0
zE8&Fp#$<_mG$vheK;-d}P7vviyn|@99x?q=Sd9Dztsi#qXjzIFHGyFXOh?D;<|H=a
z1Zq89t>0uaICGy33Hvs{Xfv{V+{r-g2j!rEt11NL842;;3v?opUPnTBON-Mxj*;a>
zzvZCV-bPW7XOOMgeufr`iJITyKYe-3`0)F*?UD5KbN3`ADaur!5BnC@sr;W&a!)}@
zp~G7msNl7fu@>HIcS!GW9*HF^?|yg}1dybKx~%bzJkJzO;P5%YEOT$J61owYmF?$<
zPm3Vjm_ev<;Zl)8A1IV@Im^Qd7Dz8ROFoc`FheAeS2yutq24pYB0khCdiJ-!J%Zbx
z%iJ8>0!e%p5#nlGvs?LV5EGJKnNBa1qtu(9*7mQ*xK(m^<X6x+{;1^IN(XZNfva#r
zbJlq4yP2gcXqa2-(OXw80DZ*u?CTYAvMg$}S5YxWvg{=lagme0q4&T2ktsxJY3Xvm
zriBDha&!j;m*CTxh`W1>^}IOD^X;M3m^FnAR841QP^3VGUt%CMp8u7}C^EuIuF2;d
z$c5WE1C%(QG#(H7ak4MsY*3j4w@8o{14_H@c$&?a%*yARPF}BZcaK`=nLhHbB<&Co
zX<kfY;n$YWwSgi(lJ+^O2)IoK;f;Apnf2k)8{Un7;P}9ufPBaFX3YB!Wv$d%h1(6A
z9T!Cho5T%fcZfEzkKaHAihxkeoh933^pVQ}j2_pM+n-OEnb$P|QTI8>`JGwwe1tFC
z-UXp4Xi0YY>h>>nLuZ{a>!@9dBI^Ab7L~XCCzD%%kwU*50%P2usZaBSU~+d(h6fGC
zz@2V3m$_!aFlY?}s`?Gn`W_eJu*`zE7wfZ7Nu*m*vQTVA2st}%!d30PAid$<?6iOC
zli*^t`132E*k?7k+Y0W2>d*3FkhVBkkzf$BdLSxsHgtpsa;6y}wbT7W-lCkSi7v#3
zN8B$jm%EqdGEgtd_~Nn%hkYb_^!W;jA9IS4{5MUF9%9Ui>wJsQbF07k`Ofcim(<UI
z$zF-GfZJKp?n~GcW@#i?t=Z|q*94Srzdf+mTX2wqm?@{GW+%zyJ}1Qew&3?J+x}Qn
z6Px?KlE|^nEtl%`iNUo@^goKXJMthFYz@SKv|s@!=U{u+1foH5GM&bajqt0;b<o*{
zSkp%cnFNYYp9bC=MSq5cR8vsQI5New!bWn4gLLC?u~c#RzTx3bP18sr05b;BhH(mg
zWySX+ARv<TC~Y|p=_3XGn=$>u-ciBlCYP|BWgIKai%e2zkBwTouKmx_P<Xx6<CNav
zYmBRKYrHNc7(x5il^88Tx{H6cK!y3iv;T;yoQND^jFmb?;><97fTX_@LXHzIQ}+zB
zy9yb`VZMY<I-50>AaiDw3ZF~G7xxg^DDaR9<+}`~9eCt^tyP&ZIdw*B=$5!Pnxs(1
z4Fg*X7@mDGWUE~Q@vEuBfp)@e&HU%5XlE3VWeF|yV>nIzr+zwHL3@_u-da{BEjlxl
zWKtI&CRQ=tVA~Gjjo;cbK|Z1qXV%)kgl@@i5I0ls9<F{uy5>f7MidoerS8tK@lORI
zK)%zxQ@ZNYh6@_kR(tKvVT0Sj-MYLNNb@g^FO1SFCVC$o?=ms`u66L78gc!5#U4_3
zvq1SOyof*n&%01YqnuXFk%0y<qvBH+GmU;9Mf^bmVS?cj5oml6cW}w?i#qm-AcPYI
zy+Ji|IyEn75T_k;wj0*{#8Af<4O?S1R>}_45U&u}ccqEls3vfEe9X$XdwvQf^RT}d
zAn#63e<S5}l$MnZKg)T}&3u{rm}AfOJ^6#}e~b$-H9@}(dJ^NOvbxEF_IfSs{@GdO
z229bh_JNjB6?kwhO7p~Y3PD4af*tt%;XslN;;Jm{1hd6!ul~dQL5pB)>Ip2wwUbN|
zzeCYy5tujJZVR<TPEt6QRrS1!c;3|Gm!pEYxWiha>0iMGT4@+eFC0^VdxOdKxM;;I
zh0Z$9)G>(J@;-JN$1TjTl+3R{5H}NAE*BE3iybPBbl$EBH!X2dz0!I1N`g94&~wUs
z?K^9Z118F?584<+EoKY7KVg;eiz^<sJneT&WT7s_m?%Q)NngGR9|bCBUu|ZL7JR3R
z?Vf)HqbhYo3Oo%g+pZk<?JNoE?axpo8s&{gG9hs`m`I?7=U;%pPDSv7BCJS8>IoaQ
z|0@6;_3j=9I-V@vHPW*8eK-!3aZTc*fk{*AaX`A))dvm^OJp$-;!S@f*PA`d_Qe36
zoK?L4J3Rx9u3@DLP~ZL2NIDLN=qM}7($b;URKa`pO`9v1-@n}_5<-uum!$F97ru&H
zV~&5Z<~#gLraTIPPpAfcdEP|=VKML_6pXD@wCXsMUPq^>!XS;U3Z(R<D_}l4l*+cT
ziU-_B3v4DywC%j_bmNaIY0;e?w`&_-Y5m3CL+MN@DMs$aBqXx4RoQK(?oK<QxEA_!
zODzFO8Z$Wo3g4??+K~GpXs4sODCAS{<owvb2hFxX=u<dDE^V2!HPU^7Idyu&{r#KQ
zRitk-NV>@0p|(+LK!lv)9#1HP3-fQg0~tOotSu>yslXK}`5C4zDDk;ZS3-S=-i&Ei
z2YHy5FdA2}*t@vgZIj~Q#3v=GYH6WH;0chUaL>$`?Om`Xm$30!7RnvxM*V-O2_<{n
zn6WYof_6_;Em&x=m+;N7We6vCBhr{O4BVO06AJHuKili-VHT7X>)Ql$L5?lb72?l3
z&_^e$C+$YnxCrVHy7FNVs-(T0X+H8X8X+iacWWaMg*&>WNKc6OH;zW+8L74Ysc%*&
z>G6{wG94$_dzJ+ZX9KVT9n&@fts}=_-Qitw7fZNm7Jii1+d-w`5=84Fc&B&leL&U`
z7ZT~ewUPB6bVOn1`UJa@H+{<W2HH;~dVyS+iBDYX!qkp;58m`*HqE=!Tx%01iGv>P
zvQI>d7XLFwr#ciGfg1B78Z?F7lcqu@lvjwu1#tbNT^&QexG%nr;)Gwgpzk1C??b*P
zTPuLtij3%5;lN`;|3ECgGW^EpFnH~G%7<+$QuK_gU%kdGr+H1u|LJL&kpJ%ftR#!E
zarB)Ai_2lZyvaCdQ{h-Q<t)^j%{S)Od6G@PMSDr2TP?jH><8%uSXFaa#c$vIM^Yb*
zV5x<GSY;`!Q!V?)k7=*?=Y&5F_hY0foymy;CKG6B3qKJE6its=06WS(X1)K78VF-@
z>3sWO#Uv&DNhE{c^+|Z-ZJh&r`j<&1(ob+d?GLUPa|b>^+w%fnpNFCUtx3(G*!nv=
zvqT+hiFs!RfAGeAU8s?3)Xp=^UJL`yciO%GJJ`g}EG{m-zPkFXiV#P)CnKVng3KkB
zvl#sU`;c(G)oMK0v3Q6xeH)92jjd^?KSv}cSPlT|)EAcjy;KhnC_qp5Xgnuy4;h8u
z{&c15o~+UqpJ2J}zduHW1MwW+HwJc3+1<86>a-F?n}Jd3;}#SD{ku1y064LdsHv$*
zuP@wU*ipmx-M`<WB%(>*aojo`HNM9IkP%l2|Kt7;#Pksg0AQRDiHV8bZjSEpn->TO
z!Lm<p204d&k`Y39Y>&g#HUkKN{#z6NzvTg-%KrvPPH3^o{i}#mQ!|u2Fb;Kfbp<x7
z@nCb{|DVl4*uT|V&O+$HPgeQ&=TTfxSBr2AAGG+qj(gelXhNqaW@2Nbe-hnIP0SE*
z>K--8%zTy54PlTWSCBJ`%)3`(Ymy;s>pe3FC<l{gvwKC@%@H6N@i9bgDjhr(OuOmn
z>7LJhTh_L}wOh*`Bk!hR>|oFah&oN5MBe^xq@003xgvXm^*zQJ^6YDWW1v{QN!M)R
zLz=kvOSGq2ug-RLNO^TJebvcZO_WAFGbcLpFt;<hTUK7#0GOWM6}lEli|L(zU*t61
z%I|(QrqR{7o~1jQE>P^Ipsb*o=FDAr=X!oP3-b+2@!$Y((x5XUFTqj-aMZ}{cM4zy
zzZM$)M+zJVEFc_S)dw4fcj9pj;Cj08=Hd6u&%dv(30S8Tl*CEN4e6O4$}#YJHkGU)
z*6J>@K(R?KDc;Wfot9A)?a}EU^Ls>#U12D7(e$AEFxGEfwoI}JNc?>fq%l{>O%8y3
zTwB}P>H&^GRdqfDm-$W<Vg^tLw`?21q67TxkCAZJXRv1@VEp&5XwEJTwmJFPJbk-S
z8?`efTxw>vOFzZK!y_gp!e#X=*A~ZeJ0l&6Xbcehx>yHD$~oPipCo>G62zvYJZ{{F
zTpEoQT`9!JL`};paH<*;krMXx8zK{$W|rj(xNf~ivO}_xD+h|CMsgV|t#(cm-9bER
zY7~KJ8SEZS)4_3;Ol2;OjpbS`TNT`s`2gUn`)5KlB!Y-1)p{RutZt2n<8|s9x+BwU
zY8TimhT$Ml2yr%RE9)z-=)c9Os=DL?s>n$$sVz`9rG&!?+ke>2Tp-8rOG3v3MO+!2
zUgnnuhPi)rNtc!NH@O}E4w|IJ%=D_XULeN%WU=_~ZNkKArI-zH4qdFbA|xa%$jj>y
z>nkiO9F&w8k{EhVcSXSdW2^W2FmMkoe_#Mok^cJI)Z};?pH1|3)wVb)DG9eZm!@nu
zHHAn*++j{(ZgJl8ZSKI0c*G|fc^NGwS&6KK{H6zMXg@E*kt)^Zk;Nfide%$b?XKhl
z)6OxI_46$GjQ@8l#M;YrmY56;cWix9RPT>cKRJ4m6YeFccZ>{gYYePwwfA}|o@>SM
z&Q5Tqh29NpI46jrx~jL%h3d1s4*VVV-ZM^POusN&$|vSaN{fR@VjikK(UkMjikIN8
zy#uc+74^Hy+NLI+j~^?uv$ON_d%KoJL{$y?>W-r@FbIDCww-_x>Y|YmQ>x8iv93rB
z4JDGVeXBn>roZh<PiJCg42aNgu7-<;tKWXrR#!0Fx!U)Ljm@Nn+~C83|8ug~?Bu1n
zY@^qkoRmni<V8l!(^OIuE65o3i5*x0?Kuf}QO73-{R?uIOoH@!5KP+Zwh#$YA*yIn
zc%wr>2s;Ve^NI$cyqt;m{w1MAvW`r<68Jf75++LB+adO5_2JYv{PDb}sK~`-{lB++
z9QazI(p}kgo4tMo*+u#v;xBf{6P8zpr20vx`IQ+`ej8<a0r4~_=wM~4)${tm{A|9+
zORhE9aa&2WXLz4=Yuh*{AY#tUY_Z%a!}Vr6#s->nR{gzpM^%fQD9<yoO8Y9z1@=B-
zvMEq?C%_v?sy4bkX-J%Fb~>3+nr$iV^5>Ng!-QgMT3I@}fJMSFEYJkVhjsw*Na^wJ
zKjOQIBeL9gz_GS-5WU&_X809@mm(picRSyPdzkMg%&c3@0b(qvG1Z7Kpe@SHpf@W<
zXf#cl_+h?byXv4IqgayViMd;vf6b_3NZl<&JfoI|Qkmq?@Z_nqs4FABk8AkB)M5xE
zpN|-0rv3MqIpSeT2YGmeScDijWkqGO-R5K1aC-;i+s0d0c0+1nV!~QlGNJ|zogZTS
zdo7umL}C5Q?}v5~(D40TX+Sh@W&TMun#5{!nl3W+?g|$ey6%CX*iWwmO|{CXJF_(C
zsWkGQ;hj!$+~@%P))sCRBI0}_thvx`abs#Kl&lid8QU(nf~=Jv+mm^rg<dPVZDyjp
zmA6~=cY}`#^5$_TG0`%@s`#-D^-0GDVt@UFbNXn9*EC5e!X;lwjkL$c&TX)9hj3vF
zN-T}F0dGb6(%3!3ExwOdJ9mP6s&$=H*>dWI<jlgUWw>c&1*NQJ57O{%kKh`EZLGtc
z68XXAKcvRu`me)O?f|e9>y!A;PX3ptVrPR(THNsZN~`eE&x?lGf#KcUUF&k(N|F`*
zwlXrq<l-QRh$A<uVGZBvAOw>_uec<E-7gRlz5I+3^BEz|S|bfMVR3PNkgS(;5c3I1
z!^cJ&{XA=yJF>jc3kQoo-;5P1DHB`Wb8p@wz4b|SdEzm9Kqh9iaKTrlJN49^G-0!l
zDFtu8ZH-g^=n#cb&SBv5i(+P*EkK4GTIja91B^s3QkYL}QQlrW2QboY)Be*C-NneD
zKON~qBsAKX`bgEijzofPbf!fL$VkZ|41JKu9zi{=-8)3TzCc=152#qy9yaE_E$r>5
zGAt?A){E0Liqkf#w5v*R`*{vYaA2fgHvqAx24ftHR|wwv&XshPLRqzdR0ySSdsNJe
z4_CwWzC<$O3T8LMs#+xJx2MbHZhmK>m^e^u$sahImjE6WRqoOTvxyy`OoO$RK7hj?
zAUe6Ug-R&b)XTGJM6<;7Awsia8L-|>g;FAEi0s-aHBI;fYxk_6aw(^=7IUtxROYBF
zf%;WKTx65zlU{1qre1ZG>b*G}ug4xLbJa+FKB}5}%WJE|PnQ}|kYm(T9UM4=L&U>~
zXqoBzaulPpdWN?C5F@hPL%KdYnJ;Q-Dc$V+C0s`m_E-i7-wwoM*|DDgd2R=vCVVK{
zu?;S-40@KAOoZsmn+*~Q59F+7z2pkzyJ^MjHCt(Nh_K8Jq|&H4k6?3%V6lrxw5I&5
zTMAE_*{*$*Ti0XN;dttK$Zu@6#9sFRN0nP~q2h_F;Kkwe?(nIid*JFb9&3ie<;y2$
z3xK{UjK_I}<vXC++a5~jOHF-54>5nESKN@K@k>op(c-nu1MOrL*R>K|o8@T`z<WH6
z09pi8Ffp1Q^p86ZhIE0C*}jlW?|(o`xBu4R+iX=xOds;lP;1v9Zs6i{2pZaNJCwbu
z5+HAV=pM!CyT4kR9~O3v3we?Cx<2;N2_r7Z=T6_Xo;bxeGUvx<!fVHfi?&bt+d80k
z#Q{)C--m)e4+BNTsWSiuq5nJ@Y4>M3Yz|Wt`A-_5(Z=&9e&Xd!1!1`iK98Y@Ku^;A
zui9Av$#WVCu^bEnbye{oUdtIWM=t4-zTu9I)hd}U=gE=|wOzxlRHAPjy;W`?O?Pvk
zc%#4AJgk4Q#m!EP4Vn}#eS2aK?nDcd7D7|1E6>Mc4b$E@B3VRMyD1XVDp_3Cz)s#4
zlGR~`){$T%8^MO6tVEEVrfpqco?L8gtNi?cjRqlbrn)&<Cd9=2_Vuf9BwP6>QJ0?X
z^UZx*+t<eafW!uYmd|20rvnOe3^&CI%o*Pmmz$l+tEw&l<Gmj=bN~G#tX6gMthsNc
z@F4zf_mr?P?OrqXPO^wuLekSmygA6^AW!R)Aa0pVpxl(rN0mHmeaE6I>fimFcyurb
zMU^D>!zEJE3a)}@Eg4QD^lL-+{1OH7gS%FG+8=UXv7ZkmMPxU$x#?2>5Rs;iQEm}@
zFSqAfyo(?8dJnEh3zxkjN4$j>np_`5;0Otskx{sBkM65E9jt&RZ%J0RlHBN4x-V4O
zDu<|~q^w%-Pvzv;w|J(5pRn+`T?jNKD(i!Tlgm!L$>Au(!jeAdUR=>m8p>suaOuq~
znXsZ1NT<MZ=k!Vc{6@U&1CVy|8L~MThZ`8r=$Z)4kg<a3!J68naMZ<xnZ~sfrjK>U
zLktW!Oo>*erd2kZ0f6eGD9|8xBEsaZAb+g+FH3_%2k#%}JV;GvN2%g@Fl@VT(cSW=
zF5YFua4#+PoXQ9yB_*X%)(`qqZl2=shAj}PxTdLgE&4^CigU6!!nA~}?zKHNNJ-P#
zYiUMlx`yGq`*#{p)8tX5MnpKEg)HH|Z5n?(s=UOQvWxQ6ir**2KE}k{#~|6pwpkam
z{EfaU;{VCgRO3SQw7>9KIE%1l^n?pPIx6am|Ha_ORbp<4xNnye>`x)9ynB<WNe=)m
zDcn(BUOY%9prH|$Vf-7lNnN(mVzi4<uX#hmu|UGYrkPzj>s8n}rq`DSKodvX>(k<u
zoL0=1FRsI<TJK_Ezf(DqQX=<DCFIc3UQbStd~lz-GpXK9Q7Nfgs4(*@eRgzn;!TKw
z=Nloq5b)||UV71w_2__wPd|R9h`19aIY;H*voSn9kKv7QHLt0y*JyWJpHTD~Wh3A*
zuSssI^Yx8RWsp%)i+3_@qrG|(_$ETU?fJ~+U=NQUiucj5rL+Iz8Io@QSQaQkn_Vuc
z1yx|LuR*o@3~aGVjbZkvd6Sc{64Tu_394%nu{-+*2KCy@`kCl{1hU#{pT&E<WI{?n
z3bm_meThD|8j2{tL@Lj3!Bx?6HrP6O_z+QMi2A_Wg3mo%%$-`0mo&1QBX-2gf9<_`
zrn$nC!XI0*?5alcXi_w3daHA=|3$ljsJv+%#-p}4y4EbC)>7zfD}7yOZ;YMI_Bx2Q
z8a0imrK|XqC%gU>hx^I=gzY-h%f>R9%+$z$YTh|*di8|1cH^#mXJl+~MIr0I4#-T@
zAFeCF^oG><aBCksd4n)*A1h`X+hLmfz2T5&pKw>--Zkmk`bnk5r4p6D7Nro9EOgA|
zuq}(NWUxoA16&vZuffPuPrO;3WsTj!=9XJO>l`iF#Pt!?ZH0b$zkNoc`Kn63Hqm6n
z?LJ}OMd>Rg7D~8*bDzG8SYES#J_%Zz{wb-=*z?s%4#{ENiDrX-PvW`4)O78Z31asW
zhZRIO-cQJ67_%Q@sj>wT$sb&<Ihy(VTF@HdiS3b=szyzgV)qxCng9&X<!lWS>T33%
zEgLU|P<=Ki`HqUYm%fONq!A9MehPn`ZmA(U=}H+^F&x<+ZlPO|T_(NC*+AaC#CR$)
zXq+zVOu+nk&_XvV*_wP7BKrJ57&%9b_st~+BR=i1h2c_KBXvLMtcS_SfH1$Z^Hu%G
znf(0Swf!2dK@!&OgQwYn&`d1g$GvYD%sBH@O0NC;+LE`JRO-3w2>&y*`trFA@&q!c
z^&K-`qiXxPuEs@AQlPA(_Ri9L$xH59;|=5#tLOiHYt+3+tzboAbMl6kKQTS_+CFWl
z2-pyvI+vkQW|tk=+z=L8WtIy%Pn}`3Ko~>GxeOyB%&)XQAmee_Ka|`LDOthM)$;Cm
z&GB|~zaL?w9Ju_{BxhbHN7O}=zD+!O6Ta&#5_qnYZ^hTe8NQB-CMvBp!rLA=o;yHo
zgG}Z&dN#Fbv=QN&v8Z?2<~n(zDn<<rhyR}TX1#UxPD{Uz0rH{ysV4echUz8mvF(i=
zlHB<($;<G;vk=`oF-?Bwjoi*X8w#;-X*oFny~Sx`EOcc2?=u0T4v<}&-ZkG5&<97^
zmHu+Lg=1x--!(TRk!P<J*st=Xk1s2bcsSDkQQp*<!zHL$h$?imqCoEcB~HtQkDh*C
zf|i1dB4I8gFVrSzKvGv9c~MnRba`MZy{-3JxYAlMcQcyFQC)3xW6eL@=TnN$Zlyx7
zodRisK7=VvzE*Ut?Yb3-z;ov7@t@+_7Ve{Qx0YHCw~||)W||N7<CO{&PLH9hhi(^t
zU=Q#GjX|=mE-pJczA07TL$puf4@?`?x(PDCu<NRn9__y}SUgUX)wD(m*Zg6UHzxH}
zE1jBxk(NSKMNRS>ussN;7S#FGX}*~^(~L@dgLE0%v_UMjR%OLE$;?DpIGZA5M8e<a
z!!2+o5y?h~$%mB5np8Hbdnx^cGo|B%Yp400_aj%GGQ%W4-!v_mZA5EeP!B;Xq@M{e
zN&EKgR9#I;$#U?@tB#n@^Br3cXMCk?g5O_Ar}{H{u4bz%#SnHv#p!7eclUHYmv`>k
z3(aj85>Eep7Ehm)=n{~E)HxFeG)~&{Li_SSdnpUWyNks-=90R#-h9w`+uRe5b(5CM
zRP;4DgQFx%R+$xENKt5|z#w#OcuDQu8%kdOK)blOys8%%)q;>DzwvRjQ5a~f*Vfex
zwC+eQQqsp{Td_9nI0Kob!@h_QQ5r4`5iw9#d_NNEUmWGie<9tLW2Jg9(y)JIUzdC^
z(D5*(A!UPN`#rCyu3W!jNUWJPy93>adarGbCc{I7zG-_>l{O@?yuR5xn#gN9C=FCz
zy|bgEy(A=FsQy0wn^uY7n-pnD1`iS}Eck(u2A5u0<8{7Bdfx~Yl-Of*|Dw*SO8Q}A
z(y_V~DmS?dSJ`?DgaL^V65H)gk2Csnq-Ia0M%RQR37STgW#5+9&Mhg6Mx_&}iHVTw
z4|6`L8JgU@>Dp^C+4M%8o?aA3*)HmlniYp`e!joFB*y8r&ygX&n*!3(Gz|>$&OA(J
zT>;dTQBIpt3>46-_>fK5VGzsuxHy&3TrfhbOosWGgy<|<@vOnYbIP{AhS8dKlpMvx
zvbP2wL^Hj#Ps}$xv<gRxFG^zRPkXH9;QWPOVpw|T;k;!x|E!KlJgJ1dGCIBq4bM+U
zC5my5YR5)Wag37^?ZnQ)#qoDPlg4{Jf>>I78YMisP+Hx?KquBzoKVro2N>h&BPid6
z`q6N)0(Fb!`qj&pWt-P=1(d$g4+L!HHK}}>>9Ps&ITDgs`jSA1cyelnkWFP(=WcDC
z;yMs@lK6wXs7&07l&}~MWzJMVME*+0xWcZACGVd7LpGYuRdXbAbo%PYkE8ms(44b0
z2mAPH_dOpwuJ?$B&O!|C`dA`hX-eiD>?cb6n;yCziV8d580hC3Z0m1hzjXx!bU9U3
zRnc0xcF!s{Zi%9H?~6Qx-0iLHIPgic!$JC+`^7Q+z?=;AIqVFs-nuTP-vr5Sw<jKV
zXIbExJ3%RV-d}9{!J|n^NtJ82YkmI?lMxv}iWWJ#T5?V=Z2I5tfSG8Cvl42@-ZQ3{
zD{B1{PxXTmm;{u-aO~=t&GFMyy-RFPoDQTIQfs^u??ae&v8T{mEs__wkSw#8kd!N;
zf=C|7@Ci4nz-^l8bGB!3h*XvM$*v#=-@-dGQm-I##X2{&KNWJj$ysQqPa{$8j;o2{
z7pq~jwC}FJNt5!Q0CQPf#>98z&fKFlxtnCUvU_!#3X9nK)`u(4K%X*?(nlswKbJf!
zyUfI-+MqsWR;xbKgQOFl&+oQ5qs|UxZhy^0&&<lS)?}$>{&|WWK`n1Qmfb<@ncrxW
z94(@szU7VKN{Jmq%I9^tMRo6Li92d2!C%t(PE*ri@EU!8KYW{<$r;Da)z^2s-sg1g
z-b(`f(pd}5=Et*mzY*T!Wk>GRImhlE`yMq1V*Zs0G8QmcZlN_bF`*sMYJD_5L`$)3
z8J^kRnwU<BRh({VZ3R-~bnfa430m!*9=9jUzP`RKEiD@x8vq3z9E^DPlWj}r1x%s)
zYbzxX>aCpq(<(yQ(-S~84KOu|9RVbN=y`7J0HaRz<*(W#D_mA)yN0+t(r}+P1$&2-
z)#kOGHVzt<lJ8kOpxVrFoITB$m>>RXmSVMEdz7pm+Dugv$pDq99m^`KtxG#Ubqa&Z
z1>(KnwT94*Kossv{T(sCm)QY&6#8p&6-!!VMuGwA;`VZ_&joJ^_ndQDtL7c)<;<%I
z2>qAEC&#71-=O`-2M>~6{g7R0ujAw6kMeT$Ke!HeWQpi+=|g5-2ZYY)yG^n0{W)KA
ze)QgZ7SMg<8zDYJ<lZ24V7S`|iqucA3krOG=g95Mq^isBT<DX^ClgOyzT(yIE=AZk
zd9LN%b55kS*W+eGoMG*8#mXG-hilncgBF@xtzSFioyF#oOe7;K=hgp|N6$fQy|Qbw
z@bUcuJLNb(TMtO^+vZT~b_r?Ff9grCiFgf~+{;(!4hUTRu1qcvFW3muWT6KO=xMJ>
z`7}g~yvx$HKw^6I3_1gA7f#D12uV0&&!@X{Fq8H!SdU)Enuvs^Ov}hXb65-PR<8QM
z@sElGC@sK(Mdk*SQk1Bbl{bfoKQ_4iQ!an|p67-R4ARW}^psE?C<}PwElXI>?2VQW
z?R3Z2c25%5Mn^HTdRMCZ8cQ0_L)i3rnOV1uVRU48Q;gLvQ&!RB=H!fim9Z?M3gYuk
zFuG?9nhs||Tz2Jnn%t-ktl4bytiXq_GS?jKmrosN9Vgv2Kb2qom;ZA#c0p(o4pu8G
z%}!2(u|X_VEZ-(3F6<vFDzOqu7Agi#`g8HC^R~AhZjN~!%8JG(rWHbQ!V)hcUiN~Z
z1Hujj?e)F<zCPqohbHpb9khrV7IRwF-rDoKjUSsot!%#J$D6U$IDnW0LXK>bnPWe5
z>YLq6`3rcNPaKV=@MEkPcNjx8c7pu6@lijt4NUM=QHV7+w~L*4ABhC94)LRG*~o04
zA#tmx)eQ-lQ$Coj?-d2)<PYPs{xzO3-&94ykxTz5E+Z9~pP89CXV!u42P>t_P+67q
zFxt{aXFAb+EqQ%fo)8(?k>T#~N?q&Hi4C|XsmODMB_%M%JKBcN-&y5^CgjyrR8%xH
zkZYD-PM{V4yH`{+K>nd1%v8|yju7wG6t#YfpgG^yru^rdpq;Ji^w_py4KV6>K{4#u
zW*bnvwt9K4m@>(KHp$Y75bwc%^!s$*|M-YlpSF%)AFMhXtOB|3a?Ze%dMGA*qhIjg
zf5wx<^QDYSDVnAwW#IV`KVP;0W*@(0c$>{ZFBXmG!1jrB=#ZSmormK=)g*_&^ABhU
z4c{WjIa#hQw-`tX#(92g@n;u{NU7+Y(Jhzq_Wz4&AVUny{RqPHOY+O6<W$=dvWe1|
z#0v(7hQ{~UxLvsMxw-M*pG-?hs4D7pM1EC3NwDBvL&rO|<uN?$+#JJ3IiS12=kz$i
z$UxUIUU4OO)opk6hI8FP*Bz^zcMDG=60k{jFW0iYxY&G6uf5M4s$@N<`4nBZ@Hnj1
zVtAHFd(Z^^_`Y}yJ5GP!9^WOoq=hdrKl97!GJIl6zEI#{bo5hkL~>Kn^uzK<8~n0w
z3CEGVl44x+LyT;}-y~LEZtkSeah=Y22neOG5}$>?Lt7dhz3Fxb=|6K<Jc49;W+pnx
z-IBUIp+*0_KhG8e`%|iOdBV;6|4K&Y?=~8LVfVd@_w`hjmHt#<(<n4iPFH1x%{pnv
zrRoBPx*Qc&UoleBMrHM@>r-Wp%#~huR?CMp89cHx652Vxdaw#weC~Q6O#4g3R(DTR
zJwHan8{zSeojEz`GV`YmT^MbBUE9A|kHq8CPhWZnN6@oFKuW2%A{o5QOpyz0V|u;D
z%Hff@Y0K|1LzR=LxxuWqzpS%g{A0p@XGQEU0w!x+57&dg2U)5n6kgMFi;A*-=joZ&
z+RQ%?t3d}!4ghlkDY_e_3DRsqw;RD?W!2-k%IC385kSx^vn<QZ5s#RgD@hU1YibH;
zVvvy)(`w5IxcN0uJu}bKl-}I(`gv{D4rlDG>-b(P^+@t5`rBRN$5nD76kU?f9W`F&
zH*Hu(VzP;{pS7>PrJB_pMLwF`p4BXu6&NzqjE$3)jWuV)BI~r2Bqt9Rj#KOs;4{^0
z)LAYsg#F}DDXn5jHe$2gdZValKp>+erX;56)N$o61=)$Krgr;PiI`8N-N{OnN7J)6
z6d#xXcZ?mwWlFFF>;k7&FJNf{wuYV<`nFuDS6BAbG53J_6rF&a(J`i<des$(c7OV|
zP8rOP|8J4U7)+YL!fMA|q;XQsBBn^|r{2+)+(pZ52q_2FWLrDcN%urzB#CLg+c3G6
zIO)GO6pF46u*7r9-@e6&n#}dlz*1N(Z6E);WRj-ceQEn{kGtO6Bj_(9o&GYTp*e!Y
z;Y7ttFPaolgnvuD-)(UfL&O=9_lV($@D+R6)8w_*q0)Vne>N=2PY*ik{EWXrr?}h~
zs{&KY8#aC{1J8|M3&Wn^oeXGYKg^oNMMYsD?9Zcnl9Kj)d?1q-cm@g7ekU&kx4K0T
z^SXAy5zb;+l4oT}NJwa)*5~GKouoCR(UZPBp6cl^3sqiRR4px~f&{|Dvmlqop_+wD
z{6S5|X>2J^3AG?Q;?7?>!$>R#`i1VVYR$y8`;TV=$I6y^=*?9(1Nq1-m!av8X;9g{
zGHD_O^zYx(GFDVLp0%%eGTGcyt3PAtkq0{S%FFbOS%;^km1bmU>+9>Af~lPBNEjj2
zw!t<w_I4hp6e1E*Rdw~|#s(ihzjB$%N9X=X&))}44<ZjmW~hW8-*Mett#`o#da6&>
z>%EcE_wSS%sQ9~N?(GYi5Xm7SA?s1C&gY-bg2C=$4PNB+<r&Bt{lm8P4+0hzz`3ab
z9#eD1Pe6orGNQyS)2hTMJ4eEUNKT%|450Tt%+@-ZHJ5@h#?a7=@_2+}jf&|SFoNDu
ziu-7b$hFU{p(IdnNAT$QvHFRJzT7)4sZVCpR*uRDby>aLUvgG7+|Lm<b#zT*$C^I)
zaFq+DV;Sqr(gNk4y$oM<pcW>d6?1Osk*XwFeI9$Z%8)zo^6;vLPpN}yX3rQ;PH8TW
zt7EnO_7T3X=OV!luH2e;GLdTsDPhy0-u0%+F>M#Uf7Nrlk18C2Hh}>D<@_V)M3SYz
z&iUoVb74UZtfXUT<N%M%TNM~5epOaZh>F5zHN1Lxe*7fz36NYZS6UEChTSepl2y3x
zWR@3i&oS4K!OL_Z5Yl7qz|UcLgr~~w($h8=gCYYCoAd@DiA@hRjJhDVq|WwHH{zH5
zDi>;_O+9C$T`)R---W>DJjr@d#}Z=e<HXj!n3NJ1OfQgZWLy0amMrUKqMFp^;Rc4X
z)(#GU{-ax|vc0{%b!hV;n)5UQ9Rec}Ptt0s!3Na#fZ2$G$0e_%<k%nC3E&%4RI|YS
zuC|5^`|TFscms><8D6K88Z>Xun<ISg=V7!JGKY&T31GMEa=Fvk)C8r41jVLtSE>%C
zxxow1Rg{3Vd%{l7DdA2FDTHKHo4D3gi2KM6nAe}%=|-7aM<mI8)8`|G@Vv+KdKkn=
z2#1GvAtb;C3XEkbQD>ijxMrYeQAWQQy0{0)nX|FdP$byJ4Gqw*Ezug<aw{VfY;d+|
zZ*kd_Gk-W7vNd#M;w7V97T|gp5krs>CsV)P=RAkDy8TyM5qgiC>3*ZbLd(y|S@Um0
z%knE<Mtaa)*j9-ClNbqa(%&yHIt=2rYEbrHv^QPX?C}zkmYmWEy`axDA<2EQ=Ek1(
z{|=)$Ui{BNn{&8b?R^C_IqxBW{*aZG1w2JUcGFlO!cuwuht<fps=8`$Xb=?jpk8Oh
zV|Y|rgZ(T#jq$Oz7O4M9%SdQ8+U7-nvueto=$@w*hhjQ%x+dl77E)7^HR|t&Ch1)H
zh)lX7Kn+DN)Lu=Bu_Mf&RzEYNB`PH@DeE{<axd=Qr3qSZxm!iWxcDfJ4|q*puPY4=
zcuWcEU%z&eQgS1&M<INEhLV6LOBeL=JeX<+CbiJ*L{Lyq(9m+IgdDLd6>^dj-Q5i(
zB}aZ~HQb88<@4>^4rchQq4V3@Q{er{XW4*ly$T~m{=7F%(Q)$c@FP;rJ_7NnW{I)`
zvB_9s-GvD_Z=j*kysozJtYGGgktupxjM!UB$dIY;J^AO;%rar9IA@SDf)!3RV(?t5
zJJ@vVu=Vi@+#6l$@trxyuWTJVa9^^dfNk;Hx0oF$u}4@5SJf`hsW!I;BY~qoA;ax1
zRYpE48!o~_s?+2a5Qh(-Yk2873BBQl6FRPtvgvYkHL<$+)#i?Y>QLu=0jZ`d;{i^S
zX1;rWsx4Fgz;w)HKLK*yC;b>ogj7Qgo{NKzpkEu->daBj^KIW{)o|uqVvZ5T$y;_)
z88m<<dXFJ$yER!LS6VvTKlINzyxx4C#r@7&Q&ZEz!UB*VrKE=T_K@!HUBDy4UDD1O
z|6N>E)~KY?VAmJ!>G9Z4f0I<(e=wJ%a+n71daDp0AH%89GQmjczciClFM$Y~Nlp{L
zjsBL?eCe%fa!L5h8nLXWCAkE3F~^P4bYxUq8jDGGM1ql_*h8&Z_aNp!wrYv+pKXx!
z8MwLU`;9G6S0o=E{KP`>^2^G=?RrN~&uYEqJ(O^WyNzx+KVuJCL$lekue;HZnE)mP
zkTPCt9X=ZyKeFfn2W||Fgzv7Td12=#1mg*HBVvQyS$a)82Lu~?^*qY##7Tx47FdTd
zlojbbsSmz?uMb{Bh94VA&SsG@qZ__RbB+E-zAfhE{dl%g$Eysf3C$UsZ!7eYmTHKY
z*cv_j6eAN1Hc1D>wN&X7eijcTahFhhrPaHEH?eSZ3i;I<`FF*WI$e9$$c`p+B{}#g
z*teL8JPNwb)ztioix3xyoAYu4M%i0y%l$x6RY^>2<5Qqem`r!N?>>G4D+`ttu4d_a
zsY^m^{Mjv}7YO5ZKDxK%1u<4sO@ZPL+Dd4Lf{YHKGQsdiWpo2)Zz3iZH+1)1n_A8g
zua(?~|8uq@f8gNYOpObnnSl~QD^N=U>vPaQ16&z3H8lVZQEF3QxgQ@@!xT6xsH-a}
zHGyci(u(ayUtBilh>qtvSkh?A*65ks<Q5L^EDp_6{p_4#iv(GNhS3vcQK?)1LmQi$
zXW4dh1S?B|!;P5T{yNvdfLiwDacV$;h`Rdj;o9rfW2#2@&l||7a*ak7yB*{^Y;s0M
z16|#d^>qQ@Sx6982mG2q=yU|A^OGe=K!QU0bPq8`$H%DI6x+4_Mw1r*&Ie+d2Pje!
zLO#%cgtBLC3Q&Ph{`z&8j^W2PPSpkC{A`HIt#dI~0=7n_t>9!-J$Ky#-0fCz5Smod
z01?xDFga_49WATfLDmCRcIQO#l7^mw5_~V(ipMv%>Od)HX}UnHw4SPSfxhy~O-%7M
zLoiPq(j0|&yjeYP;o-<{dRH8PJ2+epx3Xck!g=v$_3@dVv+?n>mbCyW_i^TH?DI>!
zr4gA21Lcp<Qn}MwOUHsw_gBbF8~Hb?k4(XOC*v{XxO!+A2+H$oFQe-*s!2f^*kFPe
zhWQmj3ZMoFcpZWNOl+`Z``KAVP0bVn7IlArKVSh1u&_iw?Ycosj?(n=Xx&=5VDx|H
z>I;jGHo%d>-eXH6r$BSbSlnLA%Fd#tq7ih9igPpmO^R_SeI)<>^XSUC`EF(+IYr-4
z|3pAo%8yf0-BVn{4~A7Gsi{7l^4fx4I4KUtYF}ga?=3uP1_*Wb8XD%JD8xI^g3dSl
zs3<j-THOkO;zdA!n6&ipPy(}mN-g_xyH|TfMaA#mlJ7|&)RdK-Kpp^7HUPlHa2<;v
z73@V1R{{XERe-t;0+J}+kP=DrK?^BC7BZfd)>T}dpply?MijCzBuI$S{!(Ixn-ax+
z-ofkrGX9h}6E3tJmacc~uF<7eZp}@rVR>I$A|r4LK`<8I)*IKhaII`tun+&-#ncle
zJaC5pmXPAj7<ip%%N&R+rdiiMvB{S|h@lDK+}7~j>;`WV5H1sshypMXdBZ!BtLpyy
zDLIsK%ui6Rk?Pw}XKAkNJ_IlqK>H@=^V>|lA3uDfzcJGmwQp^kDCA40bJ;+Ji5*TB
zL=+Uz0NDn&Tg#Zv*tebI-ChT#t1SbA!+89r4Mb^e){V78Ne#5G!!cRo-yPtI&%@4w
z^v<YLR-ui59CmWeBx{O4Tpfz%(5qPTJHKO_H{X8HlJo$Q4dEpG$$&Ah9qDx{P|1|?
z{-=1I$IVevb#*mBbb?V=-1H!rJ?`!AL&-5~wd?`+L<<i@oH2a)hN`M>#vW!6dyvp@
zcMlH&prLm~8;Cn9oLe6G7w3Rx7|=2Vq8M}oI{Gv{lv=}^xo4abKGN-ld!+GPjvRJl
z3r9(|+bl_F$eL=ylB4T8Z>=rg8oG#z6<b)J)=lk$Dz8e33GUVEly5D&X+j{?syfBJ
zd4Ab_K+I=94gX=x?ezUSUqnP-lw5dpQFI1xY(km+zX934urTTsA-<VE!m{U@#VR*%
zM>L4%g%RHG6`o!`8rgE+F4-aFxSR|$?#hm${bgcY`BuIArK?R(+>&F~@6obs{x?RN
zOp6g{2)^yL8_rQ-OEYRYHI#|jB9nEBqn)y(N8fg-jDNR(#Y@S?Yt*0nW1ur9r}i1U
zW$|ytIOqG))!{z|adEdPtM9tm-RjKxcHCbdYFDPlkTYVp-O!Gl=VaVG!=b|5KLBob
z5+8R!XQqjnSx>h#8z(2McCLKCr>7?<Xo0=Zac8=6B&#taE31fvYG#%hTe*(K(Xc2V
z6=d81Vn!XA+oZs_+qYb|Ou`F}V>_aw9(@*M8O9wPi!V$nMmnYv1(Tpx*Hlmd8TGSt
z)y(+Q3YziXzP`Y}hV=AEp`plJ+4W_SK*|OPW>8;yFB|oN6bvL5Jl6lf77f+f?|%bg
zgP@53-Vlz{qF)T#4m-<z%zdF44~2bRBz>Wl{qA!b>wX&+A1?jqx?74IR{S~TJ3?Z$
zw4L5^RZf9R_me4Hy(#OtKad6&P4lJoMJHxYxGOF2(6D5lhsmf_`{v<Xie7<p`lztD
zn15!@IeR?<seGmWqVIkI07-59&3boB0!DidIZjsUTXRMPEVxxF^@d_M9I!4WH<4zy
zEDE_C>~8_Ac4mh7$>O2PVUtDDH$HZlmbSd~`S{$w9<A5=VnZgbGpSgNK@Ig9H@-~J
z#s0RL&WYr$@fLKW&1Q03995ho0uqvmIp5LVo={%&^wf7LDP_Nul{SxNqh`&{#gPmv
zl0BZtksa92H2MtcRyu|fSzpum-C1WWX?A{EGLQLB2jadh!#*IK+(Wi{tr&J_4+{(3
zJcmtA{$nr{Pt0w*1&Brf;(T1>XO1P;p{%C%ySP{+E3zZY!@XsCS}{rbA(|@WbU7xy
zed7`eRqNtM><#ODr@`>hkeRtTOqZpyCd?Ec7;9c1&gL1mJzfa;#9zBh`#2(|tyWg_
z`9VF;WD4EtjIbXg3}K?!2UPq?(2uo%acgt8Jw1xHZOpX4AEQdgH-INZwO=~Zm3lrc
zAVdvfYsxB<xIz>;+gZr(rJx}w<D8XhH9l+nGAKG}Y0q$~BEv@cP|I0A8WS%vD|_1h
z{OD6(7Kb?XQ=yqDJvnD{fe8+l;9kh__&3Izw@K1ss`CqJyk=}{Q^G`GNry_zV}G*L
z7%nC{HiJ44>j*QUxJaROe>6XNb;U-aJ+Nf679IVUB&YYIsx%6J*L~07We>A7IZwJa
zg1zE0z(%-8N;0IAd1^Tt?kgcZzSzTgfVv+5#vyyGOiWxZwt}w0dB$GeP<>Ixgx*#z
z)4=Dkf8!^-d(IVtThQFtSfwKE<>fV}Ohf32^Y76OC$V;G5OoLD1ijWnEYV7|I#$Z?
zaSs$J6pT1HpAX_9Al8)Z>LFhBAs}9<`+=LyJzIkd^UyqA-is4xSh`RHu3a{J^2hmn
zh07oDmKsfmW2HN<u1-#me67xVFZWG^f3Igpr<b|4U+*?}%{;?}eZ&M{Dzagbc6?|L
z4iD3L9DXd+&`MJidk*)15p=zJ|7E(l2BGGDy(<uO2=ojLV8zVL$Oszu?7KLsv>TWZ
zLNm8{$EUlm&>|wD*kMUY^f~!dmv8TpnXC@7pIQd}y4f7FZ!s(}TAP~-e>?s<RiRGF
zE3&GpaNZ2<8JhaJpuq$b2#YH#qxL*cTI^!wwa~@6aukatahOL`n(R#!CM#uoDTVw1
zLPuFqG3y&`3{9VGDqdc8wz;Y4#F(|p)kdO|M$9M_*Bw|!`QW^Q0SPF+6Ou>Op<=t#
z&ddwv{fM8ckimn4hkc+&K(t5OiP%{vZLV`+R4c=d`5UJ3gPY&0I~!YM2V1URv(@l$
z#=_y+42`7K@nqT!zP&C}h=LUAeR)I#XPDUa4T<N=6E_VFyu6r&#ovPiI_pVUSI5x|
zD_KcNdnZzT<HGX1Jd+<ki16^7!GK@TW$7$VsNLM+xodGT<nKCA9eX(n%`^SP$l`~=
z=OTO3>mfQF_{#|XQnZWv0EzA<Y5un0qcJLUpkc`h23}hKHWCBFaGiOPsi`RsJxTWR
z`0E0+84ytdQd`_Vy5F#}Vp&))LPJ06cJYDI=wUDFDpEUJpu+8z>4PDn|5SN~o|;;4
zdU{#*JFCfjnKa|uU8p5!pd$vqbq8dQz=briu!vjo0r$-dDprWQgr6cA|7>K}b978p
zKGT5z{g}3de5M;mgc@qPKvQ!~lj(T!^`&w-^O8!X(Q7!bky&R-!l@PA%B=nR@Orqq
z2g<(-^;7WW4mvgZ6FChP)ztJf=tDX#GKI_F(MVdX*Z(v>5L+!bVUu8xVB*BbrR*N;
zh#8I1>ccW6#3Uw;ol_B8!K%s1rgv?g2ZV)up<OAoGMCMo42THnXIhm(UhiL>NNnGJ
z^Tz(B$s!r9SD)^$nTGz(Lb9+{PLlb|-p8EV<3*%4)bv{>11qdUGIyY2CVtVf_0iL_
z2w&<68p<U&_$^CNPi_PO`^6vs%hPWn#`DX|VV#*pzv*_j^nIi!_t`4Wnw?H1m7kG)
z4$<0+!1f922at|(I^XC4bzK(?1D1cq4H(s=-JW<!xFbN77VEvaIL{N4>)Q)?Ys^(1
zPV>kP-_}dqe))ckKb?2?(9?{o>aafwo>6<MBeV=21SE8!M9@AKeILBo2D4jb8!iS!
z|F`(~^T6e3vo|c<R^n=;Or!Cerik8GwTJs5<$}?~&W(xzeb|a`Zf@$472)a*FbC#K
z4PQk?=f^Fa#UyIuo+0ScPL9@NYRmF7FXT^pTe%5uc9cjMN_Y~hYNS;~Vm|5p>>e4^
zXkMP^OzQk0EeEkI2m|G@JCf$4`f@n_j``+f1$f<JUzpuh#ux=Fm%KGK{q|YBI5WS@
z@oX&131MTiFQV(ur}Z+37H%qotkTljJ4RLou<K>gyuO6-gauWt&m&}mqOql=fZfh8
z2wJik0w_=@RPrm!xICUG?kiBl7=$-}XFC&U#`@KL%GnU@%Xhv?EnnMRJ88@1pJRhi
z?IQRF8)%{zX1|N*%(Pl<b69C<l%`GtcZr5Z#HA0{6kVMkbh``;p`H3XX!c3zY+Wk@
zL1G0y5xye-r?s~X%d+jdMU|G4MoL1YL%O>gq(Qo+yFsM8Ti~X<yGu$yx<NWcx&>S(
zkMFnl`|Q1btbH8Ye|jMIb)VNfXN)=Kn3k57em>622j#4+tRekA^PHv$DqPJT<0;5*
zXj{8NUx@Sv@t~aA+g-byUR{T$Vc@5CVNy+`97@$rPRW#9$kA(BnVPCmSov&x4Q>3c
zEh~5-*jUe}SHmd+l$^ef@KV@>4cIZFCd=Ru(TDaK-m58}R}_+W6Pb;sWrGe88>K=N
z(L0DhcpoxI_Owy=OS0&#H=YyN!M^76fKfjKGVdoZhQCHE!b~oCHG|XeUfZfTNl`4i
zf|VGP+)Zaxf~>c3XCA7*zG3cF$A`tAD*fj+fo$5L*@`_~Q@d4w<|A*0eD3=pAb$r+
z3Fzdw3VOAQ-LWiqczAzD77bZ!Ha6~fp@NuhX40J<Q(VTdo#L1uht^*NR?7+tOO32z
zb!!&-rr;M19Ggq<wdgvSGy33!%pBaH*s_D7UL$b(_hz(^m`zMZYTfnozIFwIGhbm`
zG9n~xz@D!YGCn>XfLh}#y;m}*7K`WZ&5n~q^y=7ngoghy1C3&A$^eked_~E}M<YJo
zpe~>G_V)Dj^j5degS&mZ^y7LW2!VHJF!;XBIxi4e3qVE?>4g9$Sl8?{2vJ+J*X;y9
z2}FwLPf!gf(ewGV?SryvvcB&VKMoGgXE}^Xx)&i9c6Q_79_Del!2|6UX4dIBR#s>~
z7b5n!?!0(vL|F=vqFgf*OWWNwb92M~ZCa(`@pqv^+j0s+dWv<r%5#cRU3#r9qZ7Fj
zAbngJGK#_geiaC~A=ugwOGpGynaR5v_V)FOfu@6TO;44<>mOj?%<D0RRKy3&+RL*G
z@$3D^D>^DdnE0NuGm-fYZ=Uqg4F|5sNED;~2q-_C=wRKa=B_+UOMX7=m$+^Sz|%v_
z&5P94)Y>dXMI{(?T8#A}m6nF7y6?I4t2i=-)=EoI3D_8TZPX6SriUCSDaq+hqC=`{
z9F3n@{g@7HMhfFF-^Zh-J)JAp_5{i{0+@U1X-M({7VqogK2g5DyFV~rha=(R`*pPP
z-O2a%%tT8|xajbR-~1~Aqu;0Fqa&Z45j?CtMhq--&9PvlI#Wq>TSYqG!Re?{a;59x
z0B)^p8J>!`Fq>)aq8AhCinI3C>L72vga{gW@KTKr23J|m$)g6tqUz}R4z8{$(2Z{a
zf<Zw+fxNf`ukYi1m*0btc(mI4jc}fF3oR`G+!upU^*hXFtnvW-9Nz~JlRZ4R3mT%r
zWoQoJo2Fhl8CeEXpV#hgC)3j{zpJ$C{nB<>TdNi*_oZJIOEL8@g$pk{$##;Rb(7XX
zMp_NrpNr{Y<>MMJrphQ7)cxt=c#sAh9GJ7ba^loCG1=bRGgn;S9b4u14^C+SVu0C-
z&@dh+`qv;V-g@<Aiv)l3d0Dr0VtmhOg7WtaSf}Ktd-VQXXA!AO<v@7Q4&D)c21kI{
zXFShgNLA3wQ}Ih$5z-1?v264|4jW`gZoH`RQmgf)ruR1p+!^B8F;kP1V9)v$5R{Zh
zK3u3R1b7-qj&z#r3_gB5*cnZ))M>rijb{&8)+8T48S7ru%2r$%^491Ni=>!ERpBoG
zGmegwWHX}GNom%rdo!ZZ=r~a$r@U)|prS(UB(8B^7BMp;uIBQAftHf8wYvHgglruz
zk=L($9UUFT#Kgdv5}u^7S$6qn@w!j$H(2B0?H?SJkc__(cWZ8Lc4#Kl(_00JlZ<-8
z+r>wM{h7g5A&`?_1PT5WO``FnEd6bQuTV-VO%7E~P%ppWSz#)vG~gmCHGjF%2_)$s
z9-_V1CH4_@5D=Z_z-8S(eM*IT0EHnj5tbD{zs%~_7p~f_yl&jh&CO^+u1R-S=5@2R
zN<gv?NCOw_Esbl<i*6))$d3cn(uBlB5YAJw)JyVhWwR&5gJd<{cgS3z_8jzr!sNX4
zz=7jPQ=>{;M>|V6HyR=0Lz_o<#;`t4I+x{a#ND08yQlQ-p_sbELF%;38ss`b&^|zG
z4~Z$m>Sm+wJRo3SS{kZ}gp^bW71c$Sf`yg!VlrQ5Qq=^GGZ)@4#DaA#d2;AYTnHA`
zcui&|w)y5h&o(R33up>*jTPI_Axpo<*z8k*)yGJU8TFM4BW0QnB>kDy$I~}T63$yW
zQO=L7RO31?|KqmWIMy^s62Jf%79tm$^8|?D3sw|?!e^J4*5>9-Zl@br*Ho?$%(rjN
zF?ajI%}!74!@PuU>R^?3%wDL>SRn7*+@ubP{VX&o3rS7K<Ph*Ry-JIiS#q2^dRK3s
zH?fcE&pWB6hDt;<xM2qc)lbA}_YCF^EQ&w=6=KV$5*pRmzz;&qmfgGx4HpB$?DTYi
zcrFDwIT*GYKN((Zu-2B9m6enn1R%)zY*ACwQ>ypX`}6dWbfRQZ=-HLzLEUcoc7`q4
zZ-^l1PpCmcw~rrJf;R~l)Xf>S9(TvYm0iK>|K;i%jL15y1opMt_W86amQWjFhK26F
zbg<HRhI`!9DuD7|c5y71-lRO+*Ow}t9}olw8n?=tn$MtsXhk)6Z$PC_@-ER6)t>Qk
zI2l0DE<{^7WffesrQ~+PyukyxlX$aH(6U7E2WboD1h0O+O2gM!)P8w!@gl0G$}x{+
zPb;Y49C0pCUVh_6LYcKygYD{%>FKA!O@JVD!a8J@L9b)$fICLMuzH~^FHh9ln@{8W
z=;+(Kw*1E1=`K=$!U=G5<6{YcsL9M+1B5i+y|V-1^J{q1DnW63n`NR3>^^oD^k381
z^PN&L(6m3nnkWfaS^bkmcwfZMorq>WRJHSmDzzsXIlcPV5>vz~Vi3yqff261ac@uK
z>YrZoZw*nNhrnC@-&R5#u-)c%N=8AE)(u%$SYTy)YLvmJ?wsEO#Y#xwQ>|@*=Yh%t
z=l`SAx3Qq2pnj^W4m<H-nfDbkcc`$DqhMb1w-OrQ%&p~|O|VTW1XC+ZzAL}aQ5vEX
z%TICFoWe1+JUBptxd-03zrP=}6hCuzNGP~dq9JN1D;ojv+D1&!ZB#Do?<GCG4Fg~W
zz#NbEEhnMDp)}d|j*Wp#+}Tb!Ha@=A=iv^ZnLuFV667i1eo>$jd?G$xZWdW~<^=^U
zxJ)Q0D0a0l+oF1GWq~q?op+_`l|6-d{+)7OtuvF84mT$TIUQTvg10~+FgpXNX);7f
zCkugi`F5K}S|kUG{@2J%KnWtbJE!-J%8a5x4PKS(r0Xy9oD5N(Hki7USKmgeh~Vk(
zFOUBOZTnuB_`G2^%~|ZEl0M0utV*ry7hHnJ*hXw&IC(R(T<D!1W@En^avPfs_Br~#
zu|GY6fo%bJPydCRA|&=@UY!$L`Y6=fm0B}bH#g1<dHCR{=xAs@SZ%0*xF2iK=AP8=
z>nIhnD9GivU%2_j$L;J?U>qDM7aLQA=T{V%7L}S<o9yeG{nWS2EzP9|`p47L#jF#*
z(T7;mT=lm`-qH#-X<E5}bOe+EQDBzh_S;OMLKeFB5;3Yl>R6UQI*%(*XJt;6#b-9I
z>g>#_sp(o?J{*kgzkba$x%5f!_wU~z9e7&)^DU9~<R|D2Vj(<rf7eZ+%hHNm*1DeG
zoPvm=-vTmnbaeFW?Cj^ypNHenf976W0IvP?^i)YnNf99n3`$Vy7DH!&cM)VOX=nGY
z?AzerGYhy~xb+~o?AVwXG!59{s_U_dY$b?*J?cXThq>3YM0t~%SL^7VKzw>`cD83#
z$PRrCGim)dxA{0E>v3u~SZ@%SK%n=goG7u*Xm8eN4eB!kfx@a-MC*+NF?YhBuPiq^
z^;87?s=mJ;UY_KdDn2J2_f{zvO{YA3q)dG&Q1e@&&i0%#s$xF%Pk5Hw9ntj<_y3Ni
zf}k$QFD{nKRZ@PB)9ssm<HfI$n~^bkw9*P{h=tlWIE!C{z|o*v63|*h_cps*_RFPw
z+v$Fdjpa`0&By1+67M6A`u!o}%WpqnZ3{~a#6WyItDK>9YI10#bc^CXgKDX79}nQ*
zJV37tr(J$~MDtX8z_Sb!Kk8($zV!8LkyKgkNbywWw-tH<5;)=LSXjVki}h6HH^x3k
z@T({)BH>Q=1sNYm6#WICI^SnoLw4CA9~DX2*&D#C7<qMw$J^A>g3k|+O8DgOWs(^p
zi3NTvEZ}*0JltI>z9u3<Ie=<JQAAOr*Q_xHz6LxA3KEibwP7FVeFHKegjYbIv!sNE
ziD_bV6ovP9d1ZBiy&^<FGfJl<i`VGAUNiq2FLB9^;tac{?Cz4RrB&biKo<SB(~AtE
zzB=!&(9YYe&Mr1`aTaJ57Dg&{e0C9)n_sJUMOuGyUqUDSkyB=Je?TKW!BkIvwR4)>
z=~Kw#6PYK}{=($XIj)lb$J{9=G8s~*(h1Su9+-2kyr1O%JK=+es$j92e@Bye0CGo=
z%L2<(F2+I)PK=F3{X=hQV{>FIymvHpRdu;uM{V!*)?_v21$E!H>D6}??hM%^v+*au
zB1k-FZPYN><#2ASTTmf*!05aQ87E#oKStw&?pog0fCS;CM;k&04mwu1KabvW0}3CS
z?AA{=CHS`M0fbM*$T&4Om#Zjwch>@{N%nW+X3TgC3z~eMJQKBOQ0=CpX&?aw_MgS&
z7=&hiqW^9Oh?b+xuv;%;VPY!l=?OUQ$fB(t9$IO<0+h=XCL{eN@Djwt#d}sMt8Rg>
z1W^OoSui2HJ0Ktc_$;7g{0MRzAg~3XW}r+2wVwITKp4nSBRu7v*|C5#2qxt(_gr`0
z_(45bfP$VKMb259C%vM`(GzuMrgT5;kyu&8cd|dqW|5zFF6H>~k~kLe&o57RYqQ*9
zo)CwMVHm@`bG^sAM*fepqC6^&S$`g+o`1Rd{RcDh_3hDWCjxK$V{5&X;ySoHc`$9a
zzP@Ba**QyFQJ%--{}ougNY=tZD5JxXk&?0iu$)X1-MSs;S`;)K=kZGGHHh{kQp7iq
z*AtdVApUsu23__C!HMs%JD$x3e5E)|P_<w!srHL1LChL5Jh|n~RwUY*nV*-GkVvl_
zRKl?n@fQ~hjGUUo5SEjnOx{OA@`+3>&dJU%rXCp@vH>em0Ir!xo1EoxrPd=L;qaNm
z=EF;bR|DHM)L3Vgok2kXFx1p90BjGY79TG5;eX|*Kli%=^ety+*xd5cQh!Tf1SjCu
zoSaxcyxmX-ToO2e&w_vswXnDd`mdjHDzzIcz}<p@6UF-m65UYs8h=`UoZh)j;3~)I
z5-@u=)_I^^)c|neuZ<gH3QFJcBd`^*>YQG?0qu<5+l%=n;svFo#S9`wY64Ks^Ip_}
z?g;lopZIg5Da2A#m76KuXa4OfDlj7<T&-R07g*Q)U-_+M`i<{30Un-G@pMQ?$dgH7
zF4=PeUu=f{@<6A<>sBNoM3DjVXxE<JnqHG8?si|c^|`)Z4V9H!PC|xndUekC!p;)O
zXxs!fyx)|CldvF%Y^g`o_r392%XTVusY9-CKMjq;e3)0~NmJf5lxyV#;p^3*q2e;D
z72t2=GoXSyghM(+16xt6rP6PjTd-->K6Qi0)PxE!V;=5BmRA=cA$p9uR_@VB0{A53
zy1KgY6mn33?I3~!?U5IL{66<?AQHHpKm5M#jxLl--`U-ThXPC0+3eN?oAf@X(bLl-
z;WB*a@Zy@t{h+952eP%~4cu{Za&o~@5J*f-P36CE+OKwagS((e&&B0FQ=)p(2gK`J
zf`j2d)Pahk_t)$fE=L*~8c5W|=@3YIh<WtKIxlYX+uA<deQA}_Jq`QwFgK=SZ?1{-
z@56O-37ntTpolLnFK@S)qTur;8kb;BHU9Xq4Wu3g1r&CBfYa7-{U@375Kq)PA}dQ!
z%Kat<GHpT-9F1h==q?NA91v_dxpjhf8jIOG02_IU&to_^xPJ7Q8>A41{Ov<bRWrHe
zTz|eN2TmeZo`z=X<UT7O=5Q#>6vdcSW0mv9soReqx-+Wj(L|W-3j|(KZc<?v=uXdj
z&H(cU>_$?=3Gdb)H#DN`8&H@XS5$tFQKqLiyGS#&=^+FGjvT485^!BGIK)7)PineO
z#lQd;3_?BP%a^XMt}hV~K1~H60uB{G@t}86(QCm{KR}53^cEKfM;ZqI;FF-pCSZI$
zPrbUnhVMm1Mh+-&Ia+>^<Fz+VYW`f>(9jS9irGET)6Dd~y4d^#^`KM>91U~g{rZ(`
zjCS1yu|WKz6J2l_EJ%0ua|F}z%>A$p-s!yX(&qm2o>X!dO{al`ufJa$Qs@ugN_&RA
zK!}qloHIq0`C7!c^tOEe<`k1sLD$@(84!ZPamPe!C!D`rZaDkxNp;Wp&61AkwQ(+w
zzruP2$;)9LH~M;iUynY8TVk=VRE$Nd>_4;#4OBn{Tn}!R6Y|(#segt|&f*j#GHQRQ
z=Wa9=nK+UQ^yI+$wW87FEizHpbo%j|7hL@I9=q3K&+;I(<7MP%B8=B+YzKBC_CVBY
zF6T29nc;kZ7H{^Qo8{;<5~Q5Ed<~}bX>SSvb||V4ruJSfM{y7!S(=)u4|iNlsS#lR
zLMESQ1ALb9Aqx3Rbc~ggubbX>%FkV4=PLEU@EZdS&1Ve71JDb`IG^&v1nA^zCwD+x
z1A!W(MW^K@C7&6g?Q3CveNq+ij&aa{Ee9g;*jQL)8r4v3coi%vpVGi!BQ-sJpo?0i
z?#lf94Hv)A4F>F;(awCnM|OkwWW+UL^Qjut8;S?u?a#!I#>jwc9?L?!h``G!6h_Z$
z?c$KQ8xA1R@wXe0h}Ht&;E*8`s1Fmx>yLM>tPEh0KA_T)lN;|=@X5)ojTe5;<ni3>
zzibq!p3DAZ-g2pAxxMoZ{f&50Q4#XVD<(?MzJ$Q$Ae<|bpM>lz<`HuvO(9&$WK+Ac
z6;wmZ&Dn0(1^KNU41r^O>0z$6Fn$b+wbkZh1vxtDVZ-XCJo70`d#nt43r4bo7t=X0
zdoa*%F#Y26=k3N4-wqSt<F8Qxhz?W%w{^6=Tp{>|;T?{<-|ugS7P+-l@Kc2;G_|$w
z?(S%csVouLLTq&Qtxb@twyvO1I*?H5Z?oVO5qxCb-Z8D7%bYHVz{czYj2~_4ygpQK
zaB^{xX1@<!Ch4B4_~oy<tnV*pg<m8zuAbCF!`^M0oJB~vxVTIf-WhZ?Dyt3zfZ}sP
z*@XS?QL^59CnqGZ!V0xOdzr1Jzj-hbj$shd*aGhD_oZXbdulAip(REt1}>3-`3UiR
zDq7m$fFMu~))p7<e?7$nnYe{Tv)k{hjK<DF#p(0=66m{yTFSz)FL1QZ4pquiio_c~
z1z2oH>*}Cr;ms)GrATQJZkAI9KP@N=Lf%Z84pdOfuGBF*Zl1k0>D#wN#I@GQ)UD_C
zl+awSh}55n$q7@{yi)5&=JuD(r?D`<g^}<V3GOFP(_BDINH`?=Wj2;t?`VcXxVz6{
z2GBlahWT3!s11F+5Iu{`%uE|AE74qK_b^`e;C%8m+7~d$6h>gs8n|S%<Ma3*x=Ei1
zp52t{gF5-^aiCz!d)%1CllG<CLqWes$zq<?%E}LVLv8Q)2L1el0n(bq@ckt{l91n#
z$D;}a9Y%;Bdo3+LKOZ0ez?i0Qq++$FXEJ5MqJ<(!1z%N4^PTBo5m+e7!A{8U^MN&U
zb0l?DTT5&0dwG35HoKRJ=2rp6Owjf_xj8K;JEXI<WdyvoH_GRqikd+0lX7&d+RnkC
zI_1M_1yzy3&6>~Rp5H@|viR5>uBK0=?AJH_C2l=hP!DW3t!x20x4wUXw)T?=cvbte
zgfD?pWKiNo#Z+1gJTIJ4%#nji3ciOxiY$u7HVemo9`9u5<rEYa{^aSgv=?8j(BX+@
zW6J<g|Cvdwc<0RJ3pN-Q8-7=pqb~~-8|ZW&-eMnDKZi5Laf0)gCgtIt7gAQp9Nbuf
z&;R5YpiOcUDS7wFb1MZDJxqF;g2AG&cTpFK{9~_sdWPf<>X&K=vG$(&FhXdl`EtwX
zAlwU?ywm(p@jHa6c$fq5oLL*4AxwB5#42**MJ~Rc4kuwBFV~-+toO>v$pQauVPn$)
z(taSZz^K>k@OXcFcPCP+RvwatAuv6qyxJuwqi(x5XdjQeMgKtw4@Y2Wa5Cxn)d;Rz
z^l#@6DqjS}`UE9<SNgh_dQJ3wEgg-B=$+p>vzc^|Nyx~oEKW}MjA%$G>qsb%PD>8-
zh$@Ka^o=jm@N;<!3)fV{QfJ7kqJKm=L-|reCBW}~ngT&kmeDrBRy8upUGL?^#J~Wa
ztKIj{BWS?*{nYF9(c$f1Bb`41tclkGK3>N^R+K3fDl|Ii^p`cPt>tJkD#|D>P{Kk|
z>m}!=!-NYhKbGL)?yR<ZN-V2JdaXkQee=d&)&1D>p(qmYx(U{!B2ral2hC>y55GYm
ztE~$y%$Ags!;Y+k#Bh4~{T_rQ<P*d(hqd>^6<hxNtX^6~rKy$n^7Y}-?k)!?CYqW!
zZ;(*<DM1$(3*+$TPe4$7#^R`)-Ox}Sm^haL8LFMa!M2tCP~$MThn5}nJW^g+ZR!;!
zenw53!_WbJhu=E42OJH}H<D3a24$t#&Ha5YChnF_N8d4n*P08OAhZCEHR0>I$^KdQ
zakQ}sp@~|Snz`d|D^)zEY9+<_C>c__)TQ&Bt-%Tz=?QAAq!!r<f=z)Lxh+9K4Gl@<
z<&lMj66WTE7yDB{h#L{{EQwxQ>%#{CU~+k0jf8~QX;>N>j;vRu5=K?md)qk)-!Jz)
zz<E|<!OG&iF}O<z$JNgCZSePqwE<-U)B6T*fJ7wm4+%iqOs((ewp<Y1(<#g#Qy-a^
zGQavCE7XJ|<OZWQkI;q1xiZ;PoL>3Qdk02F_2L_H$Xt%%(4V8v$}`||w}e6Y0isJy
zSPke<6qTfL;-{tE1@$*XLGwz$1@uu+<e3)IuX1vD&di?-?YKNNg+U=^R9Jq*#^7;0
zEuj}n|A|zJnoqZi#m2Y5!Rc_5@(i2JnO-h35D7OiF<BuEiwNtL8>n>wj_v>ZEK#*T
zP=c}mn?XY>w%x#1$l2TAa{V5~vw(5L<<(yQhC?b{QbqMbOsrqLso97|o7ZGnojrQ>
zk#0wlOO)0f+<|a51z~zeMQC+)adB*Qb#`VZ10h%PfRQ677#%f5{jTeWu8d>c|CVw8
zn4j>7a8RY@)7V0tMQ^V}v|?5#2B?O`dnjNe#Konvj0?-O8&N9+MMVQH!d%z*d$J5R
z=O)xl-u*3$K9kgpfV{ClF1=qFD?m1)*3~7O<rQ*f*Ee^?`MO)`VRn-QtIYIvhi`3f
zHw#BmBEpD?*t<EX9E?wHs@IuFnWiYo8B##!OMbQ_AwL}lr!rbE;U^~Z#B#Tol$>Nf
z3s$1ahak5;`oZrsOPVUG>MG8!csYH~v%j2WP}6dZebX|~H_Xm`JM#S-m!fuhayr2~
zo8=i=bx>2HW3y#emZ0;{vbB1t|JZ5ye9$WzR`L>8?J#%~Z=JX#yIjWTox3y*T#^f9
zLTL(jM3Poonm!^m5ue>XXtgrFtskZp<$Qma`|J0GnS!kFjB|*OP3F;$TnTuwpn=gZ
zNpT!3mSP&qvN8kDtzJhDZ6hYEAAQnORb9BdVR`isae)A`^EVAq?4abDq`b8O)_eu_
z-8jiWl9Ll2M>zg%hdZDU2FOy_w~w{8Nn_tX92~6ot(^$w&Nw3zec*c_+bEzL(XMk+
z1`YR-`c8U=l@7Pd*}l?7dg_(TW>e2^-wyCJOUdHjO9aT)xe^punwA%p31?4b!=g=%
z&wqu5wM3%1##IeJY_2-6>zL0bbysy>*k)sYfG;j80gKfL1?+)+_sk7kpKT8VHuA|C
zeno-NJ4#Ej?j3(vZF~gJ@0B)G^OG7)`PoGWXy)D*5n}R3y@H;_PAxft5KCiIj`3bv
z5*n|?&tES#wwB+I-+jF>QO4aF7#;KL(j+&*8XMJVuhO>`{wSpAB>X4)hcKTAcl^*c
zt$PF^hkjLIOUq(VB!L!jeOOk6XouI639_=zfG45yjyooK_YUoQ#_+a_3?%!(SNVp>
zS{3g9@`J+wC8Z-F5tLFj(p7Tz+3pSu{MjEFJ;lNNJh*^K6Es*Ws`H!u!*g4&1sMgd
zV}(S-gOOk3Z5?dS*;X~G3UIuU;mOYI$f;qD9D+>lNtYZ%RUDBry{N5`t{Ha4R-40B
zox@cNh0~Yhjzba*EF}nMpNqP6#!!H;eqTM-|J7Zt-GEBWf7lb~2p|D=CbotH0V?lp
z(`svuwna7LNwVM|SaWvUsg7INpJat24zCZ3OGsgWH1WKbDvBJmGA03K%)VdmfLjF0
zIZD)P%-0<3WIj$B5pzu;LT~PW>F`CC6HT)^r&zInfpmlh`WoNoY+M@k8cnkXSk6|U
z%l)BSI#v+!K*ZXMMt6t31&Ekb))r4lc=+(QZzW)gAW5({<9gNiaO&T;7H|r@y}ec0
z>Tnn;2TN_}mK`{2t#<1IxpMvA&=G!tDftM-zJ#GU3jDH}n?JBe4{SK#S>Z$q6)G01
z;75*bgVFT)3*tQjG=5SGAqO9e??EeM*Vwejmd-fNyqyiZK|Liw8zn8b?)h*y>{^Rj
zkB^U}LfsVoL70>g#LP}1f?{k=XAxgeJP{iR8L%Rk-SQ^_H)6*(a6-_DoZShoM`-EN
zIp6eoeh8`ek1uKAj_(V|&0%^)#}ExhZOSMDl>Li}EP<=k{2)d{B%0N~O^u(f?eROX
ze{4*%=BhDg)ES`5)2yuONo8IqC*8xzfKmhdbR$;*zz9I7JcSx%cH1Az^nO8Coew`<
z|7}xs4Gn;zrlKRkSf58jOzte>-96j?c@#dmY=NED(&S=9>=8YlSN!hU@x02#iM_U(
zrMiZ#*8bDBkAF~8<f}~rUVc^=@#VHME-p)EHQV7*PDKqJ_zi86Go6X5$hi(A4A}@%
z9A^PsHI9%VQCseJ7HVlLK&0soPB<G!k7RsyhUH!D#nnq0ZKYH_RS73{+)#EawEd>0
zg)8(%-&Zba)h!+_SKm@Jsh@9d@`^_~IuThJniLx8=M@y^msM2c6;#knPfoJ1Hq_bU
zbGqHkL#l>YFebw$Wg2$W31QNM1NukRX2vyI>h>P}1?*5;D`9~B#M)d6VmhgVOLqL{
zl+jLkVJup9lyN_K>4^`4CYl6fXZ*s4pUFXQMzAe)*@u_D46Ro!>(p;gtk*PQqo!pa
zqo>Qt&|x+jI?IdLmXc8$`KHUp=)2OhjU7H`HMUe$Xnvl(K@+<l#uFOarUiFBx(dZb
zH>;;~?bsn1NFbDJz~#ExvA{(p_cBxtF;p(%OD3<0fz9GKn$tZqr3dw3p@>zIyxU6#
zU9J{$CwRBcB4OR_?2#IsV%pNU^SQV5y6o0UHXqJ5=9?{;949F#88chnQBvn}a{0<>
zNjZP-Z@<m)dyA5z63YLkU=-f(``B3f?}2vqL%&CEkmSGGpM7Qi`3RA<?$58rRiE=)
zG<1U5Q(=R=&rp#hA#{#^9{D6BJasU`0fiHNldMavFPDPb=^A54>n(o7skDI@4PDfH
z#9MLNq{XY!kWoDuE~WL$=s?I}BD@3p;pj>gWv8L9!A!Rcj!%l)o#L{vNC#(*Np)1G
z&9BkHm_KocCEWI?*eXO=yJ&QnT7T{$eoP08zGZkngJx=Gw$kqVGT6@REyxNZ=D8MK
zEODZeS8(^(EN9<wAvYOjiS-x>zx=X1^YpwJ5l;_{3<^qXOG_CMr1Y{H9g+l+@mp>D
zB~8=aSaaHm(ff}Hu<HZAO50tw8w*3+W>0=`-Iru(w8$AA|Cu?oO!di<{gPTxQqc_3
zZJz6+96{fIKKh=Usjl^0&{ZAM+am&+OFT#T=bHZ{^qM=f<>@;+1qJM=3qM_V!|yU%
zNyh_zOM7H26-x5vh%9G$bL#Or_m`H3R1;Q;JGJf0_|&R-Fp6@x<TlAE`3I<CzNGGW
z@1peM;%%oM#i(vs38}1{T)Q_e?49LvfCGqLPzcLk{}6QpYwhmu??HcUAN%;zkYSH5
z_kTSE=)Ly$2kl%v)NmZeA~>nHIyACkb%YrUii!A{ckH$RW+WHEwMqGEyYb5R2sWNc
zHCyev9FOjrZ(BK$j%UNB+Z7qHI%x^@+)R$DJsXKJ`zOtL<V=>>Sl>seGZI^E<bAkQ
zg5=&yH{>NBY1C9ulzO=WHsNg|Jd$Xy<m2#MU$2;`e@UORT&IHJu4xI_6{%IXca^~d
z{Lc&Ef2f_{9tLCIOztj-2o9@o=Woh0WqB%s0y0)}ad8oF&vGg&&9+$~KcO=8RI?hJ
z845S_O>Z-&<E$N#ILJ+tzS0kd%St0fDO{z;>=CdpI4-+2Ds?mH`aA^Rakro}66_f)
ztgb9i%uM@ys+RM8V@W9@BLOV~4~^vM?Z|^bm$G=pQR~QFORRsiX1Kofai4XtrL(ns
zSTR_rHyb^9-Dt(zpZ9^xtH<zxf+9#hRlxpg1C)vY!Y(K)gKGR<S~R-)LGPpkiim@e
z00s!=-8LRAivzy*Q<eN89gWvvOLG7GkW~D}4L*{1&b}~Z46qpyJ`XY{Gw4iCOjJ9&
z3@=e(m@5vcXT|M8l^&e2AB|D1@OM^ueUvs36-K-%H4~ifLidY0dQ*1Sn!d<Qpf$h$
zD%EoDRWX7yY>9AY98Ef<zJr?+MY^=vdvj#wMDZL&>_IG8jDRvhGAXi`eq@M&L&i)C
zgcRwchK?GQ{9JUX$-Ue5ZeA5RaXErwkemtts#la3=G&H;eKaxrWxmfj?gI%PuA0a+
z@+HFLV&i2p`M6kfJ50kORNPnj-1l;C{U;@n7~iaFj{QvKj3#f3hs*;2hdv6IZ*4Vm
z*sj|2S{!zuf~4jB!S|#1_@7Gz8RO%oSsmVxhbW#tucN>2v)OJPaCed(y0T#jTyZf(
zVmOFLXjbd>HBG#vl_c+ZD<MFwA-dlF^j*E<EL-*aDJHC<7Ykuf-0>pWlTu`qI7rn{
zvDX2BWYG<2g^s_Ju2~>i;O!99RFzYf7#PPYTQu#8dEVQP{1S*x3b1)=ew1<srkNwf
zr7I98**2l*<tf-!w0)8wc1tquXgb#6{xj)<O}Il5Jh<+LS^whFl1$bCypfS5Q99O&
zy=w|1$=^3OIyx$xJ>KoSw~?40$E+SGr?*GMnw>R3<v8F68Kz0|u<BqA>?Tt(KO?Vc
zgObucY$b<v4@rRWMIaeRhV!QuUrcC4PVX~jNwva3`jXbGtfHn0B!t<k?H!y5j`ne1
z^SZkT-_=A+65UpM6o|PZ<*2uWNGK`D(7(t1IF`n>2|UXUS>iu2D%mLcfUTa*Y$S!$
zyMb8H@O@5D5_jFQz3Xvo{Y)u0jv>FnKex#Q5QGW<1Gb>t?3h_}by7)e=@V#d!!nEM
zwn%7ph<G4lgu<Z8WrzGgl`otUD#-uUBvYj)arQ=LT5MExtJECfGD6P!?vj|>!D%aD
z2<hy-n%cJNbQq!5J<Qg{MgGUMmQbk|fO6Bt^!H-3wMJRxDpG?1-bu=P-Gs#CSZZ?f
z+RdmRZzLFIBTw+`_eOq1mA=cO`R7dO0KTK$XtV9mY)q3Vnm-&Xn1(HR=70tpOA>yU
z-iMB&d;}Rg7SPYdlr$79uwd1?W=k72wkL<8KZv1tTtQ_}P?A#y((4_oimOaYFJ8z<
z*L-uuQ6rZAP$Mg+?Rs!gJZGyb@%LKJYbv+bD6H}mAB}Hsn>=+3>OP7M=(Oq4xRhU?
zoTP9uR&Uab6e1$BekNf?Lbw2gmsD8_DIwz#GGEmgOh(s3og>IFQRxL0WCASjl`m|Z
zTdg3)gE^a>Adx*iv0ztMl9F5M8u96hVIqx>tKEo8F0%YM(5KMhvx$#*p$-MQLW5}~
ziN=VxBK;2JZ{NlZBJ!D!#fYUM@$&MuwY67aYPtN4v9%v(XwddtxaqXqR$A|8?eTMZ
zDx&>x6lzXAVl^=8A4*Je=#?rtq%vi*V}`J(Vtm`B>pml+j_QYsvv<w}t-_{LN4aU%
ztf$A#WlSmd@01o1q%1965r5r2H8P^m>2yIE_#4R54!LXOir&^WHu3<{>&1{O;&fwu
zt?Hc0&*E5uI2f47^OKX4%S+8VW1oTg6a_%wgAp=L1@juAuq}-wLB3RIV?<&3DR#Sa
zHVllKYUqfXUM3h4f*9tpiFMnlyr)~Q&&ZimDGq{<k_26mCc0jtD#XecDtvjjo+b&2
zt%SnbQ%C}qylK||ZRisG`4uKEl7Y_u=I+~ND(FHh@Uc-1#fCcr)bh1qTQ#PZ>EH7K
z>Qv*UP;w;u)i}3xgCTgV6zRfdAq8ESK$;_tDkusX2>v17MR_|BVwHIh_Cs%_5T2^#
zkWNHg;|))!h7=2!vuYU|Zvp%)zY_f{@H5mysN>#sd$v-y6CBFpO?Os1n7agVC@Bf2
z2?3C16*2Jex{aAEobW%O6|GjsW+Kiz4hWA>B0t$mjjW3cyMK4g*~)8CUY-jA&w^S`
zZn%QGf}}+X)0Q0^4~iYa*IA-u*V*<eaCb_C1CKeTS1byx(fKcjdU6?+ArQeNbu=mV
zKw(4X*59%Q>L6ZgXv`<ls^|TEIC@&@RV93Ormy`!v_zEsMJc-Xd&}#C3R>#hRR;q|
zG?wh+U%vzc2Cz?nq9z3&Uxs|4jHT_^>BmRZYQ9=GFA*e56h0$wTF>+673}_S+9-A`
z&UU8M#TTk<Imie<E%~=_3vvhpCz31Q49uyJOiAWEpD=bm{f#JOmp|GV9E0FwXLph>
zll-6OiFC&VBu1pa-yLuXDiJSAW4|MkGe;U&Sr^*q=U7^l+87s@8duovTJ5n8PFR)|
zhr<ues{eQ!XK|EcVpU<SUt(=sVr1CAjx<QlEX69TH4P(n(Q>Sd_ktdM@6)GeSbLT5
zSmq<?fFP#z*N*{F0uNP4-zEpvmZWiZM{GlAU{YY@`22_AW$C#Q@!l@kIg6@?dCBxO
z(X$xNKvMf}{{D3z#PORw(l3egjBIjktt<1g#R?Sl*u==%=gM4kU|~D}mle#_CUl4o
z;(}|xBL~<*^nZOl7^ZILhSQpMrmF-j)U-vO^Xa-sqA#lTmzU+gU=9|YqA-@`rQFvz
z{S<PR$6*$$EVLe{0{ADyUvCDOHr&ldX%!>^U`#es@WKY$UY(uO=Ga+SWHVPhlI*pC
za0eXV@7jV9G0VRpk(!!%e@2s2GAHyv5-K|?H8;E<w7#UGy4c#cLs7<0Hl8weYyLBT
z35COh?}N!8>$X9)mkdWkW=2hQeMh8!nqR12VmmmCsjd{^d<-KY4`(cRgu2SgV}Pvv
zZ(iC6ko6TL&aZFWMT^#_AXwniL?PTbA}%r2-#Pdtv1u170lh7mm$iWJeHZu*CCW$(
znId$@r$EshnO<22n@Hr1Hl-MfHIjtc;!pMVn4v9Q4D%#6gn<zBH{Y`a7`-E77_t8S
z@*1obdQ5Bo`M{U$B$nEXAGH@-Z(vJPHJTz(3miGC#G}aJQ6ItuD~~@R#90P>iK`$?
zh3P{)BX1Cto9n@0)q>=kvFW4v_u&Xg&bLCz<-c!lYg2@DzW(<)E;Kt*O61kH2NfZu
zmD!k=80eew&4<%YH@KYVF>8`fAWwJ$H9+q=tKUuH@k9fI)i61@m|?p7k_C=8aO_cO
z6gW`U-r<k_dt`n%?cb{!VLj%n3_#eQoI|)32JEi3rva$H*Q@|wwZq%e+O}h)j2Px|
zaf$=ei$n9>VoEy0Jwu7Z>J+r&Ti9E_nK{qYbiJ212a`mwis*`HaZKVGQWInS!}~r7
zg=xXzd_Q8v+1*5_19Pj07jw}6GQy|e7#SJq#jmI3>4_=~v7pO*=py~*LD4^pnkJ3|
zH`^erBf@4)ud|ztd`)#rddlnhk~xDktwf%6!+&wivo5su_eqQ-bnEkn;Aq1tcTvl=
zrgoa~lU2q4*Y(mAgaGPOaaq~@>f};wNG4gr8)au%dK4?7dbx7svfruKj%<#v^ndbC
zQ@GJs->~+d?BnE;Vb27fVYNU7(oC~d;csPITiE_&AF=8}Lo(yQ_kYW7Il<~Oxwz!0
zSd0Ze5b@t80R~zMLBBoCthw>oX=1W6iLxf4HKwD8B|K)ZP>>6>?A^--B6hW54lSQO
zEbLVahHQI^DCjy_I%hfh3;ME|NZHHs!^Aq|(To*$d@XWj%?n{1jL>d<NlV-4Zc)F-
zMZtf+uKav4Dy*wa!T7B{e@@ZYas^CW(!xZ}-k9``mh?yje%aVF26bO#u!c>W;uT^d
zm+WMcNs}Jn(>S?*YG@Si;}K|aeZ)(R-$R~g)!5qhbiNPLfDCcM-IYSY;-6Ch>L$+L
z*a(0pFs6E4AQ-Ndgp1M@0}m7x$5Xxm-6I`}XvP7h_*x=bGnV(6h1mw?HrodVhihc4
z^(rjPbL@@mRW8$DBZQ;dMj`z6Q2Wxu2pNnApfz$vuKYhNgBeUtO-xPw<nx1bO~TZy
z`|`OBY~Cnl%#dPtp-jIx?o~P}wHS4X&cWk}r(!+%=%=V|ZeCz(Qfg+AYoK3JmaAZP
zF8E~#kqo|2$)fDXPj4Jj40BuCFYtG?!UeEebCU2dQ0Eg+P%G7F=P#grVg43o6*{d2
zm6fZWQ1mWlacyqwsAzk7=4JeF(AacASJ+xtGu5?7U&=lj%8)9H*?u5Zer5GqPgMs}
z(Y--Kqxdy&!?&pY3hvk?1ZHt=4qk8j;gTd5iVx5jY-6HP5$c@K$)LN4;r9hd5kUUc
z6-mGWCL=tRA!1o)8@Kx;-cr+lsuF*n)ZaM-dUKY{9%b^X%3*}k6f=7CU6)3FO^=tv
zLd;Kww?zwq@fb;A`X9d@7J+yHC~rXdq=1AN7tMU1Y)cv17i}%t{~`+wMhFe>*`o_H
zJNuECAI@k`Hi-mTxUt4P9i(kfPk#uDi#e5cExx$YBTa7xx7_FQ4Iy@-^aIrucro1X
z-mN_`yq`QntP+?ydqFZjDFz-L;S5y}0@`$*3J`2bhqYdkafck8LG*YBG_`UPd=fcc
zz4?lkra!59BnNH<6Xxv#4x89jZ_u6=E`sSU8ND}KNWjCm16Kfk*QB$}YKOI{>1;X_
zy}d~+Pp6vcMAx4BcBM%gd5|oba^Jy-{yY8xK4>gN_nv?kU|(zEe}dT=+><?`<^B95
zsI8$9Gi2Oj=;=R~@$IX*IR=xyf0e1!F-sAPBuk`xp|7mHO>h%@8U``3>%*P%zGZ+{
zHt4O{3IenXMyN$nTYCviz0lFqd#XZOp$NMQ$9FhFVAHq(EiRqIR$7nZUr#KC;MdU5
zAnq5`K4lKJ;jlK5OeVj0@}A;9syr?~?vsJ1f^jDx8GlL44`@C)V5<I-vJVVT0}j$E
zW#7yU*o@Kw+QAIr7m30hqEDs={=+`Iei-1j0=@LbYNLTG1w_Iwu-jhkTnZ4Q#zxaQ
z);|M82tYG$p5DM`5=oA~4-cL{#^w`%f&TvU%Cn~w>FM_gwLJglfS-Qa3HqO-`sZWd
zp2Gh>KRt=^pN|HINbvvd7dwr;15+uWc`TXy$N?zNSEr}$ryGG6VGJjL$OZO4b@u@G
zhfA+T-hSupXB6}%nc-Ig5GYDbPcO{Nla$tP2Gl~GM2^GNPG3MPn9S#s1_K?fS5bc-
z3_Q&>GN5_VGB6}DJlF#IO0;dqEu4ymhZwG;v~*ZV$frOcN#F+!FJ0-Qx0bR#m@l(B
z3gpT<AFh^uEj5A}B)RuZ4%^?rLA%udcvgCNc$g>SnIPm3x4pF`nN0T230&ZV=|O){
z2%xZZfJX*HUc~%fuaZLRtGqV*QG_9&Z+~`v&SAYcJv%F220|4*HT82Ae$T72JRyys
zk0eh4rP*;e3S=Q*Kt<sT=c$@sP(PTPLM_#w6CDCvvu=C!axGOA6(^vVms3?xumhSd
z<A+aE0|<H@ID5aZkB=AYWhU{hJTDJahoUjoK7sk8>xb)Ae?bAa6T$bWmq4XNnC@ws
z6E(M`CG92_Z4ba<R95EZF;>D=V0sVGGckwl%JB&Z4Ay7MwQ1GLKHZIDRhJYOzmP|Z
zYKB0bm2}U1>eX#^p`S>O{%a<K5^%6BW+X89ZOSBQ#-^siq4o8b2Q#JMiz6RMA=D{S
zeF03g;^JZuvdf_S1LVY`2)UdORA>^ll$4$aLgql}03<2!_`RAM8m@-oDM9`KsOz8&
z!HswUh7z%`u*k^BC=@c0>Cr@(1!+@dcfgXr-rnBq7_?=uNmgD)#uB`AaEEMcY#8;B
zE`WTV&)t3@=v0Wj(4Y<!<^sgh-#TbrH0CF^zP||wH9UJ=oSng^1i}&%6Ppvy9_;VC
zyScStAzh3K{6Q{!S%^F;?L675BLv8#)r4T5UP_AF(J~iC=*7><&i=kWrdJU_{Hu$m
z3BfM(v%mk!%F3-*M`EI?l~ozoWZ>j<A@iU6!Osm$0@{49&Ha3OdK$>Ugv7bNxB#?$
z-LL@>py0bZUv0EDN)jSk517Ty&dwUoh8P$bDG@1UQ^uE<k!OCe1sDO^WL!o;em=S>
zvKAgTwip&149q9(UZ_KFpukpja<Q*Gz5q;BCdf`OBPAszdBf-(6B8->;UF06|MPg=
zDT9pxrK6|ENJ%-jxOn&8%Vpe0P7VcuHGPyqyu2%V?cx_l00wIC`Fq|Tf}`gQADVz2
z@N)_o(wj5cTiJPoNd^6N@KSsqe(Hs0S~O*Xxdkxz_>Ygd-Fv!N5D&1_wpT&m0Y*(a
zp-mO?=i%5_hP}F`2CCD^-u`(XE3T2g{s}<FpBNae#~qdegRk~PA=!^08i0vIuzF5V
zB(xrqhye@V8ra^|1DZ~ZcpMvBTQG~DwgL7D=l|TYTMa0O=&{jLfI(8uHUQ(w!{BQ<
z+S!@wA+W!slynk<25dkme_MeoXqSO)hil+A40_?3L`8?fuxY)#ye=*;rN}WRNC@A5
z{76$hB=F}};lxT~zQzP;+CB7t>=+){@iUYRHcK@Rj}}ELue*zVnQB&lehf=sk1zb-
zSSa9#p@9!bXSc@Zb~0B{ndV{w<Py>#A%6<#ev#v)#$!Nv%|N&SR0P0D&Mqe4umMVZ
zk}z5|*I?0-o4b2A)cg~ZIYbx}GahKqT)jjGP}AqcD_{yg+C4>I2!W<=M+zWtfv<Oj
z17Pg8fxxaoZDofH$f1<d`r4e{#b#8Pn3(?l{(O9Vq4F2Ny1NWMDfB)-A5Q%E@tCgx
z0g+sGI+zv=yPaYDb>A@x@Q6-hf)Rc;mX^?=Uzkl06F%Vq4}*>a&@Q-1YP_E3x;O86
zk2aF^d;rmqUxOlRj50GH+8d@c2sTFFa0!I1xS=g@u|Ate;j!#&FTvouh(>g{UF^j+
zH*=AeK@#<|AZJxb<CZy$gjc9xf8H;zB71fMSs72iOb*-3u&{4c2E9L6SNy;iQ+ogo
z`0EEBz}f!14Omp41poZh`$J{}w?+bw{OTDJ$-q}hjAvFUfYxbV2e_LZSkYk=N0@>a
zLfxS@7XP_g#-MSTX|OdI3pnC|f#6I(qxmzx=5+%@Maso0NJvP;#981{*@=Q_LoguY
z&kSZe4Cxp-Ihtxde}<Z#o|phW2n$QM!wXL(MU+g)KwKOy;qXU0LBjOPiZ>w80=1N1
zt|-iiP>4I_g@y2$))p4G@5>TNAa>6j92~y(x-j-Z{o2@o3bcC$7>SBd1IQBcWp6z~
z);aY!?r{J)7S@bXmC8ND4g?-#CORA>n^z$i^SO#t0ZY%o``gg$uc={fqKF$6x_CRF
zsICs5{xc+%$tWl`c0%U#RUAMMFtFF4Fd`2Z>%*I3Pp+?%FY5mWYcG&qcFGDukPs2`
z5QQLrZZ=;Q3%>yVjHKiEG!xg%%xp+G;`q6S!*8jJzp>jG(5D#Vj}V`T@j#mg2xR4{
z-{S$btlwk4kB%p6Li^Y{`rxlX_%1{C&hMY7N#+j{gw(c66DklCl6iZmzyKpG(_T(1
zon{RTFX#YT0=Lf1!}A+N=1JQBe9o)?pA&<pl<xm9<G2?fHmB@gmBLwi2L4Ei$%|G9
H>wo@V5OHiW

literal 0
HcmV?d00001

diff --git a/hack/quick-build.sh b/hack/quick-build.sh
index 23a5c87..b3ccefe 100755
--- a/hack/quick-build.sh
+++ b/hack/quick-build.sh
@@ -33,4 +33,4 @@ helm install \
   --set controller.pullPolicy=Never \
   --set controller.image=${REGISTRY}/fluence-controller:latest \
   --set scheduler.sidecarimage=${REGISTRY}/fluence-sidecar:latest \
-        fluence as-a-second-scheduler/
+        fluence as-a-second-scheduler/
\ No newline at end of file
diff --git a/sig-scheduler-plugins/pkg/controllers/podgroup_controller.go b/sig-scheduler-plugins/pkg/controllers/podgroup_controller.go
index ee267bd..5061ac1 100644
--- a/sig-scheduler-plugins/pkg/controllers/podgroup_controller.go
+++ b/sig-scheduler-plugins/pkg/controllers/podgroup_controller.go
@@ -70,6 +70,8 @@ func (r *PodGroupReconciler) Reconcile(ctx context.Context, req ctrl.Request) (c
 	log.Info("reconciling flux-framework/fluence-controller for request")
 	pg := &schedv1alpha1.PodGroup{}
 
+	// Get the timestamp as soon as reconcile happens as a fallback below
+	timestamp := metav1.NewMicroTime(time.Now())
 	if err := r.Get(ctx, req.NamespacedName, pg); err != nil {
 
 		// Case 1: if we get here and it's not found, assume not created
@@ -110,14 +112,20 @@ func (r *PodGroupReconciler) Reconcile(ctx context.Context, req ctrl.Request) (c
 		return ctrl.Result{}, err
 	}
 
+	// If the scheduler time created is Zero (not set) we set it here
+	if pg.Status.ScheduleStartTime.IsZero() {
+		return r.setTimeCreated(ctx, pg, podList.Items, timestamp)
+	}
+
 	// Inspect the size, set on the group if not done yet
 	size := len(podList.Items)
 	log.Info("PodGroup", "Name", pg.Name, "Size", size)
 
 	// When first created, size should be unset (MinMember)
+	// Get size label from the first pod
 	if int(pg.Spec.MinMember) == 0 {
 		log.Info("PodGroup", "Status", fmt.Sprintf("Pod group %s updating size to %d", pg.Name, size))
-		return r.updatePodGroupSize(ctx, pg, int32(size))
+		return r.updatePodGroupSize(ctx, pg, int32(size), podList.Items)
 
 	} else if int(pg.Spec.MinMember) != size {
 		// TODO: Not clear what to do here. Arguably, we also want to check the label size
@@ -128,6 +136,39 @@ func (r *PodGroupReconciler) Reconcile(ctx context.Context, req ctrl.Request) (c
 	return r.updateStatus(ctx, pg, podList.Items)
 
 }
+
+func (r *PodGroupReconciler) setTimeCreated(
+	ctx context.Context,
+	pg *schedv1alpha1.PodGroup,
+	pods []v1.Pod,
+	timestamp metav1.MicroTime,
+) (ctrl.Result, error) {
+
+	// First priority goes to annotation, if set
+	if len(pods) > 0 {
+
+		strTime, ok := pods[0].Labels[fluenceLabels.PodGroupTimeCreated]
+		if ok {
+			mt := metav1.MicroTime{}
+			b := []byte(strTime)
+			err := mt.UnmarshalJSON(b)
+			if err == nil {
+				timestamp = mt
+			}
+		}
+	}
+
+	// Now patch to update it
+	patch := client.MergeFrom(pg.DeepCopy())
+	pg.Status.ScheduleStartTime = timestamp
+
+	// Apply the patch to update the size
+	r.Status().Update(ctx, pg)
+	err := r.Patch(ctx, pg, patch)
+	return ctrl.Result{Requeue: true}, err
+
+}
+
 func (r *PodGroupReconciler) updateStatus(
 	ctx context.Context,
 	pg *schedv1alpha1.PodGroup,
@@ -206,6 +247,8 @@ func (r *PodGroupReconciler) updateStatus(
 
 // newPodGroup creates a new podGroup object, capturing the creation time
 // This should be followed by a request to reconsile it
+// I'm not sure this actually takes, because the metadata (spec)
+// does not stick
 func (r *PodGroupReconciler) newPodGroup(
 	ctx context.Context,
 	name, namespace string,
@@ -217,8 +260,7 @@ func (r *PodGroupReconciler) newPodGroup(
 			Name:      name,
 			Namespace: namespace,
 		},
-		// Note that we don't know the size yet
-		// The most important thing here is the MicroTime!
+		// Note that these don't really stick
 		Spec: schedv1alpha1.PodGroupSpec{
 			MinMember: groupSize,
 		},
@@ -226,15 +268,12 @@ func (r *PodGroupReconciler) newPodGroup(
 			ScheduleStartTime: metav1.NewMicroTime(time.Now()),
 		},
 	}
-	// TODO need to set a controller reference?
-	// ctrl.SetControllerReference(cluster, job, r.Scheme)
+
 	err := r.Create(ctx, pg)
 	if err != nil {
 		r.log.Error(err, "Failed to create new PodGroup", "Namespace:", pg.Namespace, "Name:", pg.Name)
-		return pg, err
 	}
-	// Successful - return and requeue
-	return pg, nil
+	return pg, err
 
 }
 
@@ -257,8 +296,19 @@ func (r *PodGroupReconciler) updatePodGroupSize(
 	ctx context.Context,
 	old *schedv1alpha1.PodGroup,
 	size int32,
+	pods []v1.Pod,
 ) (ctrl.Result, error) {
 
+	// First priority goes to annotation, if set
+	if len(pods) > 0 {
+		rawSize := pods[0].Labels[fluenceLabels.PodGroupSizeLabel]
+		groupSize, err := strconv.ParseInt(rawSize, 10, 32)
+		if err == nil {
+			size = int32(groupSize)
+		}
+	}
+
+	// Now patch to update it
 	patch := client.MergeFrom(old.DeepCopy())
 	old.Spec.MinMember = size
 
@@ -385,11 +435,10 @@ func (r *PodGroupReconciler) ensurePodGroup(ctx context.Context, obj client.Obje
 		if apierrs.IsNotFound(err) {
 			r.log.Info("Pod: ", "Status", pod.Status.Phase, "Name", pod.Name, "Group", groupName, "Namespace", pod.Namespace, "Action", "Creating PodGroup")
 
-			//owner := r.getOwnerMetadata(pod)
-
-			// TODO should an owner be set here? Setting to a specific pod seems risky/wrong in case deleted.
-			err, _ := r.newPodGroup(ctx, groupName, pod.Namespace, int32(groupSize))
-			if err != nil {
+			// Note that most of this does not stick - we have to get metadata later from pods
+			// Or just use a hiuristic (e.g., take the first pod or use reconciler first hit time)
+			_, err := r.newPodGroup(ctx, groupName, pod.Namespace, int32(groupSize))
+			if err == nil {
 				return []ctrl.Request{{NamespacedName: namespacedName}}
 			}
 			r.log.Info("Pod: ", "Status", pod.Status.Phase, "Name", pod.Name, "Group", groupName, "Namespace", pod.Namespace, "Action", "Issue Creating PodGroup")
diff --git a/sig-scheduler-plugins/pkg/fluence/labels/labels.go b/sig-scheduler-plugins/pkg/fluence/labels/labels.go
index e0040ea..e377d97 100644
--- a/sig-scheduler-plugins/pkg/fluence/labels/labels.go
+++ b/sig-scheduler-plugins/pkg/fluence/labels/labels.go
@@ -1,5 +1,11 @@
 package labels
 
+import (
+	"time"
+
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+)
+
 // Labels to be shared between different components
 
 const (
@@ -9,4 +15,22 @@ const (
 	// TODO add more labels here, to be discovered used later
 	//PodGroupNameLabel = "fluence.pod-group"
 	PodGroupSizeLabel = "fluence.group-size"
+
+	// Internal use
+	PodGroupTimeCreated = "flunce.created-at"
 )
+
+// getTimeCreated returns the timestamp when we saw the object
+func GetTimeCreated() string {
+
+	// Set the time created for a label
+	createdAt := metav1.NewMicroTime(time.Now())
+
+	// If we get an error here, the reconciler will set the time
+	var timestamp string
+	timeCreated, err := createdAt.MarshalJSON()
+	if err == nil {
+		timestamp = string(timeCreated)
+	}
+	return timestamp
+}

From 956123a289416e7ece58699c91437d4731afbdb7 Mon Sep 17 00:00:00 2001
From: vsoch <vsoch@users.noreply.github.com>
Date: Mon, 19 Feb 2024 08:36:52 -0700
Subject: [PATCH 19/28] docs: update to design description

Problem: the design description did not correspond with the numbers
Solution: fix them up, also fix some bugs in the controller and
fluence that assume we have pods / pod group (we do not always)

Signed-off-by: vsoch <vsoch@users.noreply.github.com>
---
 docs/README.md                                | 13 +++---
 .../pkg/controllers/podgroup_controller.go    | 17 ++++++--
 sig-scheduler-plugins/pkg/fluence/fluence.go  | 41 ++++++++-----------
 .../pkg/fluence/utils/utils.go                |  8 +---
 4 files changed, 41 insertions(+), 38 deletions(-)

diff --git a/docs/README.md b/docs/README.md
index c4718d6..5884850 100644
--- a/docs/README.md
+++ b/docs/README.md
@@ -16,11 +16,14 @@ Both the controller and scheduler logic are bootstrapped from the same underlyin
 2. The mutating webhook provided by the fluence-controller intercepts the job and adds labels
 3. The controller for PodGroup (an abstraction that holds a name, size, and time created to describe one or more pods) is watching for pod events
 4. When a pod is creating (it shows up as Pending or other in the cluster, and doesn't have to be scheduled yet) it starts to reconcile
-5. The reconcile ensures that the PodGroup is created and updated with the correct metadata and statuses (and cleaned up when the time comes)
-6. As soon as the Pod is pending and the group exists, it starts going through the scheduling queue and hits the fluence-scheduler endpoints
-7. The fluence-scheduler uses the PodGroup name to associate each individual pod with a group and start time, allowing to sort them together
-8. They are sorted together, down to the MicroSecond, and Created to run on the cluster
-9. When the top level abstraction cleans up and the PodGroup size is equal to the number of pods finished or failed, the PodGroup cleans up
+  - The reconcile ensures that the PodGroup is created and updated with the correct metadata and statuses (and cleaned up when the time comes)
+5. As soon as the Pod is pending and the group exists, it starts going through the scheduling [queue and process](https://kubernetes.io/docs/concepts/scheduling-eviction/scheduling-framework/) and hits the fluence-scheduler endpoints
+  - The fluence-scheduler uses the PodGroup name to associate each individual pod with a group and start time, allowing to sort them together
+  - Starting times are based on micro seconds to provide a distinct-ness to group creation times, even when done en-masse
+  - Pods that don't get have a group (if there is delay in the reconciler making one) are pushed off from scheduling until they do.
+6. Fluxion is queried via a GRPC endpoint, asking for a match for the job specification and an allocation -- "MatchAllocate"
+7. The pods are then scheduled together, and the abstraction (e.g., Job) created in the Kubernetes cluster
+  - When the top level abstraction cleans up and the PodGroup size is equal to the number of pods finished or failed, the PodGroup cleans up
 
 The result is (hopefully) a smooth and efficient scheduling experience. We are still working on it.
 
diff --git a/sig-scheduler-plugins/pkg/controllers/podgroup_controller.go b/sig-scheduler-plugins/pkg/controllers/podgroup_controller.go
index 5061ac1..73b7d2d 100644
--- a/sig-scheduler-plugins/pkg/controllers/podgroup_controller.go
+++ b/sig-scheduler-plugins/pkg/controllers/podgroup_controller.go
@@ -180,7 +180,7 @@ func (r *PodGroupReconciler) updateStatus(
 	switch pg.Status.Phase {
 	case "":
 		pg.Status.Phase = schedv1alpha1.PodGroupPending
-		result, err := r.updateOwnerReferences(ctx, pg, &pods[0])
+		result, err := r.updateOwnerReferences(ctx, pg, pods)
 		if result.Requeue || err != nil {
 			return result, err
 		}
@@ -188,7 +188,7 @@ func (r *PodGroupReconciler) updateStatus(
 	case schedv1alpha1.PodGroupPending:
 		if len(pods) >= int(pg.Spec.MinMember) {
 			pg.Status.Phase = schedv1alpha1.PodGroupScheduling
-			result, err := r.updateOwnerReferences(ctx, pg, &pods[0])
+			result, err := r.updateOwnerReferences(ctx, pg, pods)
 			if result.Requeue || err != nil {
 				return result, err
 			}
@@ -349,12 +349,21 @@ func getCurrentPodStats(pods []v1.Pod) (int32, int32, int32) {
 func (r *PodGroupReconciler) updateOwnerReferences(
 	ctx context.Context,
 	pg *schedv1alpha1.PodGroup,
-	pod *v1.Pod,
+	pods []v1.Pod,
 ) (ctrl.Result, error) {
 
+	// We will want to re-queue in most cases
+	result := ctrl.Result{Requeue: true}
+
+	// No pods, just ignore
+	if len(pods) == 0 {
+		return result, nil
+	}
+	pod := pods[0]
+
 	// Case 1: The pod itself doesn't have owner references. YOLO
 	if len(pod.OwnerReferences) == 0 {
-		return ctrl.Result{}, nil
+		return result, nil
 	}
 
 	// Collect owner references for pod group
diff --git a/sig-scheduler-plugins/pkg/fluence/fluence.go b/sig-scheduler-plugins/pkg/fluence/fluence.go
index 8cdc066..f126db6 100644
--- a/sig-scheduler-plugins/pkg/fluence/fluence.go
+++ b/sig-scheduler-plugins/pkg/fluence/fluence.go
@@ -166,34 +166,29 @@ func (f *Fluence) PreFilter(
 	// a user defined group. This is a size 1 group we handle equivalently.
 	groupName, pg := f.pgMgr.GetPodGroup(ctx, pod)
 
-	// Not scheduled by fluence - we have no idea about groups or sizes, just ask for one
+	// If we don't have a pod group and it's here, it was asked to be scheduled by fluence
+	// but the group isn't ready. Unshedulable for now.
 	if pg == nil {
-		klog.Infof("[Fluence] Unknown request to schedule %s yet, asking Fluxion for one node", pod.Name)
-		pg = fgroup.CreateFakeGroup(pod)
-		err := f.AskFlux(ctx, pod, pg, pg.Name)
+		klog.Infof("[Fluence] Group %s/%s does not have a pod group, not schedulable yet.", pod.Namespace, pod.Name)
+		return nil, framework.NewStatus(framework.Unschedulable, "Missing podgroup")
+	}
+	klog.Infof("[Fluence] Pod %s is in group %s with minimum members %d", pod.Name, groupName, pg.Spec.MinMember)
+
+	// Has this podgroup been seen by fluence yet? If yes, we will have it in the cache
+	cache := fcore.GetFluenceCache(groupName)
+	klog.Infof("[Fluence] cache %s", cache)
+
+	// Fluence has never seen this before, we need to schedule an allocation
+	// It also could have been seen, but was not able to get one.
+	if cache == nil {
+		klog.Infof("[Fluence] Does not have nodes for %s yet, asking Fluxion", groupName)
+
+		// groupName is the namespaced name <namespace>/<name>
+		err := f.AskFlux(ctx, pod, pg, groupName)
 		if err != nil {
 			klog.Infof("[Fluence] Fluxion returned an error %s, not schedulable", err.Error())
 			return nil, framework.NewStatus(framework.Unschedulable, err.Error())
 		}
-	} else {
-		klog.Infof("[Fluence] Pod %s is in group %s with minimum members %d", pod.Name, groupName, pg.Spec.MinMember)
-
-		// Has this podgroup been seen by fluence yet? If yes, we will have it in the cache
-		cache := fcore.GetFluenceCache(groupName)
-		klog.Infof("[Fluence] cache %s", cache)
-
-		// Fluence has never seen this before, we need to schedule an allocation
-		// It also could have been seen, but was not able to get one.
-		if cache == nil {
-			klog.Infof("[Fluence] Does not have nodes for %s yet, asking Fluxion", groupName)
-
-			// groupName is the namespaced name <namespace>/<name>
-			err := f.AskFlux(ctx, pod, pg, groupName)
-			if err != nil {
-				klog.Infof("[Fluence] Fluxion returned an error %s, not schedulable", err.Error())
-				return nil, framework.NewStatus(framework.Unschedulable, err.Error())
-			}
-		}
 	}
 
 	// This is the next node in the list
diff --git a/sig-scheduler-plugins/pkg/fluence/utils/utils.go b/sig-scheduler-plugins/pkg/fluence/utils/utils.go
index f2969d2..f24f6d4 100644
--- a/sig-scheduler-plugins/pkg/fluence/utils/utils.go
+++ b/sig-scheduler-plugins/pkg/fluence/utils/utils.go
@@ -17,7 +17,6 @@ limitations under the License.
 package utils
 
 import (
-	"fmt"
 	"strings"
 
 	v1 "k8s.io/api/core/v1"
@@ -53,11 +52,8 @@ func PreparePodJobSpec(pod *v1.Pod, groupName string) *pb.PodSpec {
 	// the check back if there is
 	ps.Labels = getPodJobspecLabels(pod)
 
-	// Note that Container gets use for the JobSpec, so we provide
-	// the pod name (to be associated with tasks) for it. We are making
-	// the assumption that this one container represents the group,
-	// which is OK for now, but might not always be true!
-	ps.Container = fmt.Sprintf("%s-%s", pod.Namespace, pod.Name)
+	// the jobname should be the group name
+	ps.Container = groupName
 
 	// Create accumulated requests for cpu and limits
 	// CPU and memory are summed across containers

From 10379356d614aa9d55271403eb586f6d3177a971 Mon Sep 17 00:00:00 2001
From: vsoch <vsoch@users.noreply.github.com>
Date: Mon, 19 Feb 2024 14:49:49 -0700
Subject: [PATCH 20/28] testing: gke then eks

I am making small changes as I test on GKE and EKS. My first tests on GKE had
me creating / deleting jobs, and I think the state of fluence (fluxion) got out
of sync with the jobs, meaning that fluxion thought jobs were running that were not
and then was unable to allocate new ones. To adjust for that we can add back in the
cancel response, but this will only work given that fluence has not lost memory
of the job id. We likely need an approach that can either save the jobids to the
state data (that could be reloaded) or a way to inspect jobs explicitly and purge,
OR (better) a way to look up a job not based on the id, but based on the group id
(the command in the jobspec). That way, regardless of a jobid, we could lose all
of our state and still find the old (stale) job to delete. With a fresh state
and larger cluster I am able to run jobs on GKE, but they are enormously slow -
lammps size 2 2 2 is taking over 20 minutes. This is not the fault of fluence -
GKE networking sucks. To keep debugging I likely need to move over to AWS with
EFA, of course that introduces more things to figure out like EFA, etc.

Signed-off-by: vsoch <vsoch@users.noreply.github.com>
---
 sig-scheduler-plugins/pkg/fluence/events.go  |  3 +++
 sig-scheduler-plugins/pkg/fluence/fluence.go | 15 +++++++++++----
 2 files changed, 14 insertions(+), 4 deletions(-)

diff --git a/sig-scheduler-plugins/pkg/fluence/events.go b/sig-scheduler-plugins/pkg/fluence/events.go
index 395517a..b891713 100644
--- a/sig-scheduler-plugins/pkg/fluence/events.go
+++ b/sig-scheduler-plugins/pkg/fluence/events.go
@@ -22,6 +22,9 @@ import (
 // We assume that the cancelled job also means deleting the pod group
 func (f *Fluence) cancelFluxJob(groupName string) error {
 
+	// TODO: it's a bit risky to store state here, because if the scheduler
+	// restarts we cannot look up the jobid, and then cannot cancel it.
+	// There is no way to request cancelling the job for a specific group
 	jobid, ok := f.groupToJobId[groupName]
 
 	// The job was already cancelled by another pod
diff --git a/sig-scheduler-plugins/pkg/fluence/fluence.go b/sig-scheduler-plugins/pkg/fluence/fluence.go
index f126db6..33976ae 100644
--- a/sig-scheduler-plugins/pkg/fluence/fluence.go
+++ b/sig-scheduler-plugins/pkg/fluence/fluence.go
@@ -255,11 +255,18 @@ func (f *Fluence) AskFlux(
 	_, isAllocated := f.groupToJobId[groupName]
 	f.mutex.Unlock()
 
-	// Not allowing cancel for now - not sure how or why we could do this, need to better
-	// understand the case. This function should ONLY be successful on a new match allocate,
-	// otherwise the calling logic does not make sense.
+	// This case happens when there is some reason that an initial job pods partially allocated,
+	// but then the job restarted, and new pods are present but fluence had assigned nodes to
+	// the old ones (and there aren't enough). The job would have had to complete in some way,
+	// and the PodGroup would have to then recreate, and have the same job id (the group name).
+	// This happened when I cancalled a bunch of jobs and they didn't have the chance to
+	// cancel in fluence. What we can do here is assume the previous pods are no longer running
+	// and cancel the flux job to create again.
 	if isAllocated {
-		return fmt.Errorf("[Fluence] Pod %s in group %s is allocated and calling AskFlux, should we be here?\n", pod.Name, groupName)
+		klog.Info("Warning - group %s was previously allocated and is requesting again, so must have completed.", groupName)
+		f.mutex.Lock()
+		f.cancelFluxJob(groupName)
+		f.mutex.Unlock()
 	}
 
 	// IMPORTANT: this is a JobSpec for *one* pod, assuming they are all the same.

From f52e2092a8d15fb8ee32a8d270130897d88c0769 Mon Sep 17 00:00:00 2001
From: vsoch <vsoch@users.noreply.github.com>
Date: Thu, 7 Mar 2024 01:33:45 -0700
Subject: [PATCH 21/28] refactor: testing idea to wrap coscheduling

This is the "skeleton" of a new idea to wrap coscheduling, adding
in the logic for fluence only where it is needed, likely in
the PodGroup (in the new fluence/core/core that wraps the same
in coscheduling). This is just a skeleton because we are deploying
the sidecar with the wrapped scheduling and absolutely no logic
ported over to AskFlux. I think I have a sense of where to put
this, but wanted to save this vanilla/skeleton state in case
we need to go back to it. Note that it did not work to have
fluence inherit the functions from coscheduler, so I opted for
a strategy of adding it as a helper field, and then just
using it when necessary.

Signed-off-by: vsoch <vsoch@users.noreply.github.com>
---
 README.md                                     |   6 +
 examples/pod-group/lammps/lammps2.yaml        |   4 +-
 examples/pod-group/lammps/lammps4-2.yaml      |   4 +-
 examples/pod-group/lammps/lammps4-3.yaml      |   4 +-
 examples/pod-group/lammps/lammps4.yaml        |   4 +-
 examples/pod-group/lammps/lammps5.yaml        |   4 +-
 examples/pod-group/lammps/lammps6.yaml        |   4 +-
 examples/test_example/fluence-sized-job.yaml  |   2 +-
 sig-scheduler-plugins/cmd/scheduler/main.go   |   5 +-
 .../pkg/controllers/podgroup_controller.go    |  46 +-
 sig-scheduler-plugins/pkg/fluence/README.md   |  29 --
 .../pkg/fluence/core/core.go                  | 392 ++++++++++++-----
 sig-scheduler-plugins/pkg/fluence/events.go   | 166 -------
 sig-scheduler-plugins/pkg/fluence/fluence.go  | 414 ++++++++----------
 .../pkg/fluence/labels/labels.go              |  40 +-
 15 files changed, 556 insertions(+), 568 deletions(-)
 delete mode 100644 sig-scheduler-plugins/pkg/fluence/README.md
 delete mode 100644 sig-scheduler-plugins/pkg/fluence/events.go

diff --git a/README.md b/README.md
index ae420fd..89f2a18 100644
--- a/README.md
+++ b/README.md
@@ -6,6 +6,12 @@ Fluence enables HPC-grade pod scheduling in Kubernetes via the [Kubernetes Sched
 
 **Important** Fluence does not currently support use in conjunction with the kube-scheduler. Pods must all be scheduled by Fluence, and *you should not use both schedulers in the same cluster*.
 
+## TODO
+
+- Need to list pods, get state, and if is completed, cancel the job id.
+- Keep track of state of all pods in group, when all of pods are completed, then issue cancel.
+- Calculate on the fly - on the update event we want to loop through pods, if ALL completed, then delete the podid for fluence.
+
 ## Getting started
 
 For instructions on how to start Fluence on a K8s cluster, see [examples](examples/). Documentation and instructions for reproducing our CANOPIE-2022 paper (citation below) can be found in the [canopie22-artifacts branch](https://github.com/flux-framework/flux-k8s/tree/canopie22-artifacts).
diff --git a/examples/pod-group/lammps/lammps2.yaml b/examples/pod-group/lammps/lammps2.yaml
index 5cc7535..5a83c97 100644
--- a/examples/pod-group/lammps/lammps2.yaml
+++ b/examples/pod-group/lammps/lammps2.yaml
@@ -14,6 +14,6 @@ spec:
       command: lmp -v x 1 -v y 1 -v z 1 -in in.reaxc.hns -nocite
       resources:
         limits:
-          cpu: 2
+          cpu: 10
         requests:
-          cpu: 2
\ No newline at end of file
+          cpu: 10
diff --git a/examples/pod-group/lammps/lammps4-2.yaml b/examples/pod-group/lammps/lammps4-2.yaml
index 777e73c..6b647bc 100644
--- a/examples/pod-group/lammps/lammps4-2.yaml
+++ b/examples/pod-group/lammps/lammps4-2.yaml
@@ -17,6 +17,6 @@ spec:
       command: lmp -v x 1 -v y 1 -v z 1 -in in.reaxc.hns -nocite
       resources:
         limits:
-          cpu: 2
+          cpu: 10
         requests:
-          cpu: 2
\ No newline at end of file
+          cpu: 10
diff --git a/examples/pod-group/lammps/lammps4-3.yaml b/examples/pod-group/lammps/lammps4-3.yaml
index 76c5ed0..b182751 100644
--- a/examples/pod-group/lammps/lammps4-3.yaml
+++ b/examples/pod-group/lammps/lammps4-3.yaml
@@ -17,6 +17,6 @@ spec:
       command: lmp -v x 1 -v y 1 -v z 1 -in in.reaxc.hns -nocite
       resources:
         limits:
-          cpu: 2
+          cpu: 10
         requests:
-          cpu: 2
\ No newline at end of file
+          cpu: 10
diff --git a/examples/pod-group/lammps/lammps4.yaml b/examples/pod-group/lammps/lammps4.yaml
index 38ae0a7..9420902 100644
--- a/examples/pod-group/lammps/lammps4.yaml
+++ b/examples/pod-group/lammps/lammps4.yaml
@@ -18,6 +18,6 @@ spec:
       command: lmp -v x 1 -v y 1 -v z 1 -in in.reaxc.hns -nocite
       resources:
         limits:
-          cpu: 2
+          cpu: 10
         requests:
-          cpu: 2
\ No newline at end of file
+          cpu: 10
diff --git a/examples/pod-group/lammps/lammps5.yaml b/examples/pod-group/lammps/lammps5.yaml
index 7546b48..e85299f 100644
--- a/examples/pod-group/lammps/lammps5.yaml
+++ b/examples/pod-group/lammps/lammps5.yaml
@@ -17,6 +17,6 @@ spec:
       command: lmp -v x 1 -v y 1 -v z 1 -in in.reaxc.hns -nocite
       resources:
         limits:
-          cpu: 2
+          cpu: 10
         requests:
-          cpu: 2
\ No newline at end of file
+          cpu: 10
diff --git a/examples/pod-group/lammps/lammps6.yaml b/examples/pod-group/lammps/lammps6.yaml
index 2030192..14ebae3 100644
--- a/examples/pod-group/lammps/lammps6.yaml
+++ b/examples/pod-group/lammps/lammps6.yaml
@@ -17,6 +17,6 @@ spec:
       command: lmp -v x 1 -v y 1 -v z 1 -in in.reaxc.hns -nocite
       resources:
         limits:
-          cpu: 2
+          cpu: 10
         requests:
-          cpu: 2
\ No newline at end of file
+          cpu: 10
diff --git a/examples/test_example/fluence-sized-job.yaml b/examples/test_example/fluence-sized-job.yaml
index a195d87..d1e7556 100644
--- a/examples/test_example/fluence-sized-job.yaml
+++ b/examples/test_example/fluence-sized-job.yaml
@@ -11,6 +11,6 @@ spec:
       containers:
       - name: fluence-job
         image: busybox
-        command: [echo, potato]
+        command: [sleep, "20"]
       restartPolicy: Never
   backoffLimit: 4
diff --git a/sig-scheduler-plugins/cmd/scheduler/main.go b/sig-scheduler-plugins/cmd/scheduler/main.go
index d9a580a..2b21d28 100644
--- a/sig-scheduler-plugins/cmd/scheduler/main.go
+++ b/sig-scheduler-plugins/cmd/scheduler/main.go
@@ -26,6 +26,7 @@ import (
 
 	"sigs.k8s.io/scheduler-plugins/pkg/capacityscheduling"
 	"sigs.k8s.io/scheduler-plugins/pkg/coscheduling"
+	"sigs.k8s.io/scheduler-plugins/pkg/fluence"
 	"sigs.k8s.io/scheduler-plugins/pkg/networkaware/networkoverhead"
 	"sigs.k8s.io/scheduler-plugins/pkg/networkaware/topologicalsort"
 	"sigs.k8s.io/scheduler-plugins/pkg/noderesources"
@@ -36,7 +37,7 @@ import (
 	"sigs.k8s.io/scheduler-plugins/pkg/trimaran/loadvariationriskbalancing"
 	"sigs.k8s.io/scheduler-plugins/pkg/trimaran/lowriskovercommitment"
 	"sigs.k8s.io/scheduler-plugins/pkg/trimaran/targetloadpacking"
-	"sigs.k8s.io/scheduler-plugins/pkg/fluence"
+
 	// Ensure scheme package is initialized.
 	_ "sigs.k8s.io/scheduler-plugins/apis/config/scheme"
 )
@@ -56,8 +57,6 @@ func main() {
 		app.WithPlugin(preemptiontoleration.Name, preemptiontoleration.New),
 		app.WithPlugin(targetloadpacking.Name, targetloadpacking.New),
 		app.WithPlugin(lowriskovercommitment.Name, lowriskovercommitment.New),
-		// Sample plugins below.
-		// app.WithPlugin(crossnodepreemption.Name, crossnodepreemption.New),
 		app.WithPlugin(podstate.Name, podstate.New),
 		app.WithPlugin(qos.Name, qos.New),
 		app.WithPlugin(fluence.Name, fluence.New),
diff --git a/sig-scheduler-plugins/pkg/controllers/podgroup_controller.go b/sig-scheduler-plugins/pkg/controllers/podgroup_controller.go
index 73b7d2d..27c31cb 100644
--- a/sig-scheduler-plugins/pkg/controllers/podgroup_controller.go
+++ b/sig-scheduler-plugins/pkg/controllers/podgroup_controller.go
@@ -58,10 +58,8 @@ type PodGroupReconciler struct {
 
 // Reconcile is part of the main kubernetes reconciliation loop which aims to
 // move the current state of the cluster closer to the desired state.
-// TODO(user): Modify the Reconcile function to compare the state specified by
-// the PodGroup object against the actual cluster state, and then
-// perform operations to make the cluster state reflect the state specified by
-// the user.
+// Note that we currently don't do deletion based on owner references, but that
+// would be ideal (I could not get it to work)
 //
 // For more details, check Reconcile and its Result here:
 // - https://pkg.go.dev/sigs.k8s.io/controller-runtime@v0.11.0/pkg/reconcile
@@ -82,6 +80,7 @@ func (r *PodGroupReconciler) Reconcile(ctx context.Context, req ctrl.Request) (c
 		log.Error(err, fmt.Sprintf("Unable to retrieve pod group %s", req.NamespacedName))
 		return ctrl.Result{}, err
 	}
+	log.Info("REFERENCES", "Reconciler", pg.ObjectMeta.OwnerReferences)
 
 	// Grab all statuses (and groups of them) we are interested in
 	schedulingOrPending := (pg.Status.Phase == schedv1alpha1.PodGroupScheduling || pg.Status.Phase == schedv1alpha1.PodGroupPending)
@@ -175,35 +174,32 @@ func (r *PodGroupReconciler) updateStatus(
 	pods []v1.Pod,
 ) (ctrl.Result, error) {
 
+	log := log.FromContext(ctx)
 	patch := client.MergeFrom(pg.DeepCopy())
+	log.Info("PodGroup", "Phase", pg.Status.Phase)
 
 	switch pg.Status.Phase {
 	case "":
 		pg.Status.Phase = schedv1alpha1.PodGroupPending
-		result, err := r.updateOwnerReferences(ctx, pg, pods)
-		if result.Requeue || err != nil {
-			return result, err
-		}
 
 	case schedv1alpha1.PodGroupPending:
 		if len(pods) >= int(pg.Spec.MinMember) {
+			log.Info("PodGroup", "Phase", "Scheduling")
 			pg.Status.Phase = schedv1alpha1.PodGroupScheduling
-			result, err := r.updateOwnerReferences(ctx, pg, pods)
-			if result.Requeue || err != nil {
-				return result, err
-			}
 		}
 	default:
 
-		// Get updated counts of running, succeeded, and failed pods
-		running, succeeded, failed := getCurrentPodStats(pods)
-
 		// If for some reason we weren't pending and now have fewer than min required, flip back to pending
 		if len(pods) < int(pg.Spec.MinMember) {
+			log.Info("PodGroup", "Phase", "Length of pods less than min member, pending")
 			pg.Status.Phase = schedv1alpha1.PodGroupPending
 			break
 		}
 
+		// Get updated counts of running, succeeded, and failed pods
+		running, succeeded, failed := getCurrentPodStats(pods)
+		log.Info("PodGroup", "Running", running, "Succeeded", succeeded, "Failed", failed)
+
 		// A pod with succeeded + running STILL less than the minimum required is scheduling
 		if succeeded+running < pg.Spec.MinMember {
 			pg.Status.Phase = schedv1alpha1.PodGroupScheduling
@@ -232,16 +228,18 @@ func (r *PodGroupReconciler) updateStatus(
 	}
 
 	// Apply the patch to update, or delete if finished
-	// TODO would be better if owner references took here, so delete on owner deletion
-	// TODO deletion is not currently handled for Deployment, ReplicaSet, StatefulSet
-	// as they are expected to persist. You can delete / lose and bring up again
 	var err error
 	if pg.Status.Phase == schedv1alpha1.PodGroupFinished || pg.Status.Phase == schedv1alpha1.PodGroupFailed {
+		log.Info("PodGroup", "Status", "Finished", "Owners", pg.OwnerReferences)
+
+		// Delete the group if it is finished or failed
 		err = r.Delete(ctx, pg)
-	} else {
-		r.Status().Update(ctx, pg)
-		err = r.Patch(ctx, pg, patch)
+		// Update but don't requeue
+		// _, err := r.updateOwnerReferences(ctx, pg, pods)
+		return ctrl.Result{}, err
 	}
+	r.Status().Update(ctx, pg)
+	err = r.Patch(ctx, pg, patch)
 	return ctrl.Result{Requeue: true}, err
 }
 
@@ -366,21 +364,25 @@ func (r *PodGroupReconciler) updateOwnerReferences(
 		return result, nil
 	}
 
-	// Collect owner references for pod group
+	// Collect current owner references for pod group,
+	// We want to ensure we add unique ones across the pod
 	owners := []metav1.OwnerReference{}
 	var refs []string
 	for _, ownerRef := range pod.OwnerReferences {
 		refs = append(refs, fmt.Sprintf("%s/%s", pod.Namespace, ownerRef.Name))
 		owners = append(owners, ownerRef)
 	}
+
 	patch := client.MergeFrom(pg.DeepCopy())
 	if len(refs) != 0 {
 		sort.Strings(refs)
 		pg.Status.OccupiedBy = strings.Join(refs, ",")
 	}
+	// If we have owners, collapose into list
 	if len(owners) > 0 {
 		pg.ObjectMeta.OwnerReferences = owners
 	}
+
 	// Apply the patch to update the size
 	r.Status().Update(ctx, pg)
 	err := r.Patch(ctx, pg, patch)
diff --git a/sig-scheduler-plugins/pkg/fluence/README.md b/sig-scheduler-plugins/pkg/fluence/README.md
deleted file mode 100644
index 61f4923..0000000
--- a/sig-scheduler-plugins/pkg/fluence/README.md
+++ /dev/null
@@ -1,29 +0,0 @@
-# Overview
-
-Project to manage Flux tasks needed to standardize kubernetes HPC scheduling interfaces
-
-## Installing the chart
-
-More detail will be added here about installing the chart. You will
-be using the [install-as-a-second-scheduler](https://github.com/kubernetes-sigs/scheduler-plugins/tree/master/manifests/install/charts/as-a-second-scheduler)
-charts. Fluence-specific values are detailed below.
-
-### Fluence specific values
-
-In `values.yaml` it is possible to customize the container image, already defaulted to the latest release, and the allocation policy
-used by the scheduler.
-Most common options are:
-
-- `lonode`: choose the nodes with lower ID first. Can be compared to packing
-- `low`: choose cores with lowest IDs from multiple nodes. Can be compared to spread process-to-resource placement
-
-## Maturity Level
-
-<!-- Check one of the values: Sample, Alpha, Beta, GA -->
-
-- [x] Sample (for demonstrating and inspiring purpose)
-- [ ] Alpha (used in companies for pilot projects)
-- [ ] Beta (used in companies and developed actively)
-- [ ] Stable (used in companies for production workloads)
-
-<!-- TODO: write some useful KubeFlux documentation -->
diff --git a/sig-scheduler-plugins/pkg/fluence/core/core.go b/sig-scheduler-plugins/pkg/fluence/core/core.go
index a3f4531..efa1127 100644
--- a/sig-scheduler-plugins/pkg/fluence/core/core.go
+++ b/sig-scheduler-plugins/pkg/fluence/core/core.go
@@ -1,161 +1,329 @@
+/*
+Copyright 2020 The Kubernetes Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
 package core
 
 import (
+	"context"
 	"fmt"
+	"sync"
+	"time"
 
-	klog "k8s.io/klog/v2"
-
+	gochache "github.com/patrickmn/go-cache"
+	corev1 "k8s.io/api/core/v1"
+	"k8s.io/apimachinery/pkg/api/resource"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	"k8s.io/apimachinery/pkg/labels"
+	"k8s.io/apimachinery/pkg/types"
+	informerv1 "k8s.io/client-go/informers/core/v1"
+	listerv1 "k8s.io/client-go/listers/core/v1"
+	"k8s.io/klog/v2"
 	"k8s.io/kubernetes/pkg/scheduler/framework"
-	pb "sigs.k8s.io/scheduler-plugins/pkg/fluence/fluxcli-grpc"
+	"sigs.k8s.io/controller-runtime/pkg/client"
+
+	"sigs.k8s.io/scheduler-plugins/apis/scheduling/v1alpha1"
+	"sigs.k8s.io/scheduler-plugins/pkg/util"
 )
 
-// FluxStateData is a CycleState
-// It holds the PodCache for a pod, which has node assignment, group, and group size
-// We also save the group name and size, and time created, in case we want to (somehow) resume scheduling
-// In practice I'm not sure how CycleState objects are dumped and loaded. Kueue has a dumper :P
-// https://github.com/kubernetes/enhancements/blob/master/keps/sig-scheduling/624-scheduling-framework/README.md#cyclestate
-type FluxStateData struct {
-	NodeCache NodeCache
+type Status string
+
+const (
+	// PodGroupNotSpecified denotes no PodGroup is specified in the Pod spec.
+	PodGroupNotSpecified Status = "PodGroup not specified"
+	// PodGroupNotFound denotes the specified PodGroup in the Pod spec is
+	// not found in API server.
+	PodGroupNotFound Status = "PodGroup not found"
+	Success          Status = "Success"
+	Wait             Status = "Wait"
+)
+
+// Manager defines the interfaces for PodGroup management.
+type Manager interface {
+	PreFilter(context.Context, *corev1.Pod) error
+	Permit(context.Context, *corev1.Pod) Status
+	GetPodGroup(context.Context, *corev1.Pod) (string, *v1alpha1.PodGroup)
+	GetCreationTimestamp(*corev1.Pod, time.Time) time.Time
+	DeletePermittedPodGroup(string)
+	CalculateAssignedPods(string, string) int
+	ActivateSiblings(pod *corev1.Pod, state *framework.CycleState)
+	BackoffPodGroup(string, time.Duration)
 }
 
-// Clone is required for CycleState plugins
-func (s *FluxStateData) Clone() framework.StateData {
-	return &FluxStateData{NodeCache: s.NodeCache}
+// PodGroupManager defines the scheduling operation called
+type PodGroupManager struct {
+	// client is a generic controller-runtime client to manipulate both core resources and PodGroups.
+	client client.Client
+	// snapshotSharedLister is pod shared list
+	snapshotSharedLister framework.SharedLister
+	// scheduleTimeout is the default timeout for podgroup scheduling.
+	// If podgroup's scheduleTimeoutSeconds is set, it will be used.
+	scheduleTimeout *time.Duration
+	// permittedPG stores the podgroup name which has passed the pre resource check.
+	permittedPG *gochache.Cache
+	// backedOffPG stores the podgorup name which failed scheudling recently.
+	backedOffPG *gochache.Cache
+	// podLister is pod lister
+	podLister listerv1.PodLister
+	sync.RWMutex
 }
 
-// NewFluxState creates an entry for the CycleState with the node and group name
-func NewFluxState(nodeName string, groupName string) *FluxStateData {
-	cache := NodeCache{NodeName: nodeName}
-	return &FluxStateData{NodeCache: cache}
+// NewPodGroupManager creates a new operation object.
+func NewPodGroupManager(client client.Client, snapshotSharedLister framework.SharedLister, scheduleTimeout *time.Duration, podInformer informerv1.PodInformer) *PodGroupManager {
+	pgMgr := &PodGroupManager{
+		client:               client,
+		snapshotSharedLister: snapshotSharedLister,
+		scheduleTimeout:      scheduleTimeout,
+		podLister:            podInformer.Lister(),
+		permittedPG:          gochache.New(3*time.Second, 3*time.Second),
+		backedOffPG:          gochache.New(10*time.Second, 10*time.Second),
+	}
+	return pgMgr
 }
 
-// NodeCache holds the node name and tasks for the node
-// For the PodGroupCache, these are organized by group name,
-// and there is a list of them
-type NodeCache struct {
-	NodeName string
+func (pgMgr *PodGroupManager) BackoffPodGroup(pgName string, backoff time.Duration) {
+	if backoff == time.Duration(0) {
+		return
+	}
+	pgMgr.backedOffPG.Add(pgName, nil, backoff)
+}
 
-	// Tie assignment back to PodGroup, which can be used to get size and time created
-	GroupName string
+// ActivateSiblings stashes the pods belonging to the same PodGroup of the given pod
+// in the given state, with a reserved key "kubernetes.io/pods-to-activate".
+func (pgMgr *PodGroupManager) ActivateSiblings(pod *corev1.Pod, state *framework.CycleState) {
+	pgName := util.GetPodGroupLabel(pod)
+	if pgName == "" {
+		return
+	}
 
-	// Assigned tasks (often pods) to nodes
-	// https://github.com/flux-framework/flux-k8s/blob/9f24f36752e3cced1b1112d93bfa366fb58b3c84/src/fluence/fluxion/fluxion.go#L94-L97
-	AssignedTasks int
-}
+	pods, err := pgMgr.podLister.Pods(pod.Namespace).List(
+		labels.SelectorFromSet(labels.Set{v1alpha1.PodGroupLabel: pgName}),
+	)
+	if err != nil {
+		klog.ErrorS(err, "Failed to obtain pods belong to a PodGroup", "podGroup", pgName)
+		return
+	}
 
-// A pod group cache holds a list of nodes for an allocation, where each has some number of tasks
-// along with the expected group size. This is intended to replace PodGroup
-// given the group name, size (derived from annotations) and timestamp
-type PodGroupCache struct {
-	GroupName string
+	for i := range pods {
+		if pods[i].UID == pod.UID {
+			pods = append(pods[:i], pods[i+1:]...)
+			break
+		}
+	}
 
-	// This is a cache of nodes for pods
-	Nodes []NodeCache
+	if len(pods) != 0 {
+		if c, err := state.Read(framework.PodsToActivateKey); err == nil {
+			if s, ok := c.(*framework.PodsToActivate); ok {
+				s.Lock()
+				for _, pod := range pods {
+					namespacedName := GetNamespacedName(pod)
+					s.Map[namespacedName] = pod
+				}
+				s.Unlock()
+			}
+		}
+	}
 }
 
-// PodGroups seen by fluence
-var groupsSeen map[string]*PodGroupCache
+// PreFilter filters out a pod if
+// 1. it belongs to a podgroup that was recently denied or
+// 2. the total number of pods in the podgroup is less than the minimum number of pods
+// that is required to be scheduled.
+func (pgMgr *PodGroupManager) PreFilter(ctx context.Context, pod *corev1.Pod) error {
+	klog.V(5).InfoS("Pre-filter", "pod", klog.KObj(pod))
+	pgFullName, pg := pgMgr.GetPodGroup(ctx, pod)
+	if pg == nil {
+		return nil
+	}
 
-// Init populates the groupsSeen cache
-func Init() {
-	groupsSeen = map[string]*PodGroupCache{}
-}
+	if _, exist := pgMgr.backedOffPG.Get(pgFullName); exist {
+		return fmt.Errorf("podGroup %v failed recently", pgFullName)
+	}
 
-// GetFluenceCache determines if a group has been seen.
-// Yes -> we return the PodGroupCache entry
-// No -> the entry is nil / does not exist
-func GetFluenceCache(groupName string) *PodGroupCache {
-	entry, _ := groupsSeen[groupName]
-	return entry
-}
+	pods, err := pgMgr.podLister.Pods(pod.Namespace).List(
+		labels.SelectorFromSet(labels.Set{v1alpha1.PodGroupLabel: util.GetPodGroupLabel(pod)}),
+	)
+	if err != nil {
+		return fmt.Errorf("podLister list pods failed: %w", err)
+	}
 
-// DeletePodGroup deletes a pod from the group cache
-func DeletePodGroup(groupName string) {
-	delete(groupsSeen, groupName)
-}
+	if len(pods) < int(pg.Spec.MinMember) {
+		return fmt.Errorf("pre-filter pod %v cannot find enough sibling pods, "+
+			"current pods number: %v, minMember of group: %v", pod.Name, len(pods), pg.Spec.MinMember)
+	}
 
-// CreateNodePodsList creates a list of node pod caches
-func CreateNodeList(nodelist []*pb.NodeAlloc, groupName string) (nodepods []NodeCache) {
+	if pg.Spec.MinResources == nil {
+		return nil
+	}
 
-	// Create a pod cache for each node
-	nodepods = make([]NodeCache, len(nodelist))
+	// TODO(cwdsuzhou): This resource check may not always pre-catch unschedulable pod group.
+	// It only tries to PreFilter resource constraints so even if a PodGroup passed here,
+	// it may not necessarily pass Filter due to other constraints such as affinity/taints.
+	if _, ok := pgMgr.permittedPG.Get(pgFullName); ok {
+		return nil
+	}
 
-	// TODO: should we be integrating topology information here? Could it be the
-	// case that some nodes (pods) in the group should be closer?
-	for i, v := range nodelist {
-		nodepods[i] = NodeCache{
-			NodeName:      v.GetNodeID(),
-			AssignedTasks: int(v.GetTasks()),
-			GroupName:     groupName,
-		}
+	nodes, err := pgMgr.snapshotSharedLister.NodeInfos().List()
+	if err != nil {
+		return err
 	}
 
-	// Update the pods in the PodGroupCache (groupsSeen)
-	updatePodGroupCache(groupName, nodepods)
-	return nodepods
+	minResources := pg.Spec.MinResources.DeepCopy()
+	podQuantity := resource.NewQuantity(int64(pg.Spec.MinMember), resource.DecimalSI)
+	minResources[corev1.ResourcePods] = *podQuantity
+	err = CheckClusterResource(nodes, minResources, pgFullName)
+	if err != nil {
+		klog.ErrorS(err, "Failed to PreFilter", "podGroup", klog.KObj(pg))
+		return err
+	}
+	pgMgr.permittedPG.Add(pgFullName, pgFullName, *pgMgr.scheduleTimeout)
+	return nil
 }
 
-// updatePodGroupList updates the PodGroupCache with a listing of nodes
-func updatePodGroupCache(groupName string, nodes []NodeCache) {
-	cache := PodGroupCache{
-		Nodes:     nodes,
-		GroupName: groupName,
+// Permit permits a pod to run, if the minMember match, it would send a signal to chan.
+func (pgMgr *PodGroupManager) Permit(ctx context.Context, pod *corev1.Pod) Status {
+	pgFullName, pg := pgMgr.GetPodGroup(ctx, pod)
+	if pgFullName == "" {
+		return PodGroupNotSpecified
+	}
+	if pg == nil {
+		// A Pod with a podGroup name but without a PodGroup found is denied.
+		return PodGroupNotFound
+	}
+
+	assigned := pgMgr.CalculateAssignedPods(pg.Name, pg.Namespace)
+	// The number of pods that have been assigned nodes is calculated from the snapshot.
+	// The current pod in not included in the snapshot during the current scheduling cycle.
+	if int32(assigned)+1 >= pg.Spec.MinMember {
+		return Success
 	}
-	groupsSeen[groupName] = &cache
+	return Wait
 }
 
-// GetNextNode gets the next node in the PodGroupCache
-func (p *PodGroupCache) GetNextNode() (string, error) {
+// GetCreationTimestamp returns the creation time of a podGroup or a pod.
+func (pgMgr *PodGroupManager) GetCreationTimestamp(pod *corev1.Pod, ts time.Time) time.Time {
+	pgName := util.GetPodGroupLabel(pod)
+	if len(pgName) == 0 {
+		return ts
+	}
+	var pg v1alpha1.PodGroup
+	if err := pgMgr.client.Get(context.TODO(), types.NamespacedName{Namespace: pod.Namespace, Name: pgName}, &pg); err != nil {
+		return ts
+	}
+	return pg.CreationTimestamp.Time
+}
 
-	nextnode := ""
+// DeletePermittedPodGroup deletes a podGroup that passes Pre-Filter but reaches PostFilter.
+func (pgMgr *PodGroupManager) DeletePermittedPodGroup(pgFullName string) {
+	pgMgr.permittedPG.Delete(pgFullName)
+}
 
-	// Quick failure state - we ran out of nodes
-	if len(p.Nodes) == 0 {
-		return nextnode, fmt.Errorf("[Fluence] PodGroup %s ran out of nodes.", p.GroupName)
+// GetPodGroup returns the PodGroup that a Pod belongs to in cache.
+func (pgMgr *PodGroupManager) GetPodGroup(ctx context.Context, pod *corev1.Pod) (string, *v1alpha1.PodGroup) {
+	pgName := util.GetPodGroupLabel(pod)
+	if len(pgName) == 0 {
+		return "", nil
+	}
+	var pg v1alpha1.PodGroup
+	if err := pgMgr.client.Get(ctx, types.NamespacedName{Namespace: pod.Namespace, Name: pgName}, &pg); err != nil {
+		return fmt.Sprintf("%v/%v", pod.Namespace, pgName), nil
 	}
+	return fmt.Sprintf("%v/%v", pod.Namespace, pgName), &pg
+}
 
-	// The next is the 0th in the list
-	nextnode = p.Nodes[0].NodeName
-	klog.Infof("[Fluence] Next node for group %s is %s", p.GroupName, nextnode)
+// CalculateAssignedPods returns the number of pods that has been assigned nodes: assumed or bound.
+func (pgMgr *PodGroupManager) CalculateAssignedPods(podGroupName, namespace string) int {
+	nodeInfos, err := pgMgr.snapshotSharedLister.NodeInfos().List()
+	klog.Info(nodeInfos)
+	if err != nil {
+		klog.ErrorS(err, "Cannot get nodeInfos from frameworkHandle")
+		return 0
+	}
+	var count int
+	for _, nodeInfo := range nodeInfos {
+		for _, podInfo := range nodeInfo.Pods {
+			pod := podInfo.Pod
+			if util.GetPodGroupLabel(pod) == podGroupName && pod.Namespace == namespace && pod.Spec.NodeName != "" {
+				count++
+			}
+		}
+	}
 
-	// If there is only one task left, we are going to use it (and remove the node)
-	if p.Nodes[0].AssignedTasks == 1 {
-		klog.Infof("[Fluence] First node has one remaining task slot")
-		slice := p.Nodes[1:]
+	return count
+}
 
-		// If after we remove the node there are no nodes left...
-		// Note that I'm not deleting the node from the cache because that is the
-		// only way fluence knows it has already assigned work (presence of the key)
-		if len(slice) == 0 {
-			klog.Infof("[Fluence] Assigning node %s. There are NO reamining nodes for group %s\n", nextnode, p.GroupName)
-			// delete(podGroupCache, groupName)
-			return nextnode, nil
+// CheckClusterResource checks if resource capacity of the cluster can satisfy <resourceRequest>.
+// It returns an error detailing the resource gap if not satisfied; otherwise returns nil.
+func CheckClusterResource(nodeList []*framework.NodeInfo, resourceRequest corev1.ResourceList, desiredPodGroupName string) error {
+	for _, info := range nodeList {
+		if info == nil || info.Node() == nil {
+			continue
 		}
 
-		klog.Infof("[Fluence] Assigning node %s. There are nodes left for group", nextnode, p.GroupName)
-		updatePodGroupCache(p.GroupName, slice)
-		return nextnode, nil
+		nodeResource := util.ResourceList(getNodeResource(info, desiredPodGroupName))
+		for name, quant := range resourceRequest {
+			quant.Sub(nodeResource[name])
+			if quant.Sign() <= 0 {
+				delete(resourceRequest, name)
+				continue
+			}
+			resourceRequest[name] = quant
+		}
+		if len(resourceRequest) == 0 {
+			return nil
+		}
 	}
+	return fmt.Errorf("resource gap: %v", resourceRequest)
+}
 
-	// If we get here the first node had >1 assigned tasks
-	klog.Infof("[Fluence] Assigning node %s for group %s. There are still task assignments available for this node.", nextnode, p.GroupName)
-	p.Nodes[0].AssignedTasks = p.Nodes[0].AssignedTasks - 1
-	return nextnode, nil
+// GetNamespacedName returns the namespaced name.
+func GetNamespacedName(obj metav1.Object) string {
+	return fmt.Sprintf("%v/%v", obj.GetNamespace(), obj.GetName())
 }
 
-// GetNextNode gets the next available node we can allocate for a group
-// TODO this should be able to take and pass forward a number of tasks.
-// It is implicity 1 now, but doesn't have to be.
-func GetNextNode(groupName string) (string, error) {
+func getNodeResource(info *framework.NodeInfo, desiredPodGroupName string) *framework.Resource {
+	nodeClone := info.Clone()
+	for _, podInfo := range info.Pods {
+		if podInfo == nil || podInfo.Pod == nil {
+			continue
+		}
+		if util.GetPodGroupFullName(podInfo.Pod) != desiredPodGroupName {
+			continue
+		}
+		nodeClone.RemovePod(podInfo.Pod)
+	}
 
-	// Get our entry from the groupsSeen cache
-	klog.Infof("[Fluence] groups seen %s", groupsSeen)
-	entry, ok := groupsSeen[groupName]
+	leftResource := framework.Resource{
+		ScalarResources: make(map[corev1.ResourceName]int64),
+	}
+	allocatable := nodeClone.Allocatable
+	requested := nodeClone.Requested
+
+	leftResource.AllowedPodNumber = allocatable.AllowedPodNumber - len(nodeClone.Pods)
+	leftResource.MilliCPU = allocatable.MilliCPU - requested.MilliCPU
+	leftResource.Memory = allocatable.Memory - requested.Memory
+	leftResource.EphemeralStorage = allocatable.EphemeralStorage - requested.EphemeralStorage
 
-	// This case should not happen
-	if !ok {
-		return "", fmt.Errorf("[Fluence] Map is empty")
+	for k, allocatableEx := range allocatable.ScalarResources {
+		requestEx, ok := requested.ScalarResources[k]
+		if !ok {
+			leftResource.ScalarResources[k] = allocatableEx
+		} else {
+			leftResource.ScalarResources[k] = allocatableEx - requestEx
+		}
 	}
-	// Get the next node from the PodGroupCache
-	return entry.GetNextNode()
+	klog.V(4).InfoS("Node left resource", "node", klog.KObj(info.Node()), "resource", leftResource)
+	return &leftResource
 }
diff --git a/sig-scheduler-plugins/pkg/fluence/events.go b/sig-scheduler-plugins/pkg/fluence/events.go
deleted file mode 100644
index b891713..0000000
--- a/sig-scheduler-plugins/pkg/fluence/events.go
+++ /dev/null
@@ -1,166 +0,0 @@
-package fluence
-
-import (
-	"context"
-	"time"
-
-	"google.golang.org/grpc"
-	v1 "k8s.io/api/core/v1"
-	klog "k8s.io/klog/v2"
-
-	pb "sigs.k8s.io/scheduler-plugins/pkg/fluence/fluxcli-grpc"
-	fgroup "sigs.k8s.io/scheduler-plugins/pkg/fluence/group"
-)
-
-// Events are associated with inforers, typically on pods, e.g.,
-// delete: deletion of a pod
-// update: update of a pod!
-// For both of the above, there are cases to cancel the flux job
-//  associated with the group id
-
-// cancelFluxJobForPod cancels the flux job for a pod.
-// We assume that the cancelled job also means deleting the pod group
-func (f *Fluence) cancelFluxJob(groupName string) error {
-
-	// TODO: it's a bit risky to store state here, because if the scheduler
-	// restarts we cannot look up the jobid, and then cannot cancel it.
-	// There is no way to request cancelling the job for a specific group
-	jobid, ok := f.groupToJobId[groupName]
-
-	// The job was already cancelled by another pod
-	if !ok {
-		klog.Infof("[Fluence] Request for cancel of group %s is already complete.", groupName)
-		return nil
-	}
-	klog.Infof("[Fluence] Cancel flux job: %v for group %s", jobid, groupName)
-
-	// This first error is about connecting to the server
-	conn, err := grpc.Dial("127.0.0.1:4242", grpc.WithInsecure())
-	if err != nil {
-		klog.Errorf("[Fluence] Error connecting to server: %v", err)
-		return err
-	}
-	defer conn.Close()
-
-	grpcclient := pb.NewFluxcliServiceClient(conn)
-	_, cancel := context.WithTimeout(context.Background(), 200*time.Second)
-	defer cancel()
-
-	// This error reflects the success or failure of the cancel request
-	request := &pb.CancelRequest{JobID: int64(jobid)}
-	res, err := grpcclient.Cancel(context.Background(), request)
-	if err != nil {
-		klog.Errorf("[Fluence] did not receive any cancel response: %v", err)
-		return err
-	}
-	klog.Infof("[Fluence] Job cancellation for group %s result: %d", groupName, res.Error)
-
-	// And this error is if the cancel was successful or not
-	if res.Error == 0 {
-		klog.Infof("[Fluence] Successful cancel of flux job: %d for group %s", jobid, groupName)
-		delete(f.groupToJobId, groupName)
-	} else {
-		klog.Warningf("[Fluence] Failed to cancel flux job %d for group %s", jobid, groupName)
-	}
-	return nil
-}
-
-// updatePod is called on an update, and the old and new object are presented
-func (f *Fluence) updatePod(oldObj, newObj interface{}) {
-
-	oldPod := oldObj.(*v1.Pod)
-	newPod := newObj.(*v1.Pod)
-
-	// a pod is updated, get the group
-	// TODO should we be checking group / size for old vs new?
-	groupName, pg := f.pgMgr.GetPodGroup(context.TODO(), oldPod)
-
-	// If PodGroup is nil, still try to look up a faux name
-	if pg == nil {
-		pg = fgroup.CreateFakeGroup(oldPod)
-		groupName = pg.Name
-	}
-
-	klog.Infof("[Fluence] Processing event for pod %s in group %s from %s to %s", newPod.Name, groupName, newPod.Status.Phase, oldPod.Status.Phase)
-
-	switch newPod.Status.Phase {
-	case v1.PodPending:
-		// in this state we don't know if a pod is going to be running, thus we don't need to update job map
-	case v1.PodRunning:
-		// if a pod is start running, we can add it state to the delta graph if it is scheduled by other scheduler
-	case v1.PodSucceeded:
-		klog.Infof("[Fluence] Pod %s succeeded, Fluence needs to free the resources", newPod.Name)
-
-		f.mutex.Lock()
-		defer f.mutex.Unlock()
-
-		// Do we have the group id in our cache? If yes, we haven't deleted the jobid yet
-		// I am worried here that if some pods are succeeded and others pending, this could
-		// be a mistake - fluence would schedule it again
-		_, ok := f.groupToJobId[groupName]
-		if ok {
-			f.cancelFluxJob(groupName)
-		} else {
-			klog.Infof("[Fluence] Succeeded pod %s/%s in group %s doesn't have flux jobid", newPod.Namespace, newPod.Name, groupName)
-		}
-
-	case v1.PodFailed:
-
-		// a corner case need to be tested, the pod exit code is not 0, can be created with segmentation fault pi test
-		klog.Warningf("[Fluence] Pod %s in group %s failed, Fluence needs to free the resources", newPod.Name, groupName)
-
-		f.mutex.Lock()
-		defer f.mutex.Unlock()
-
-		_, ok := f.groupToJobId[groupName]
-		if ok {
-			f.cancelFluxJob(groupName)
-		} else {
-			klog.Errorf("[Fluence] Failed pod %s/%s in group %s doesn't have flux jobid", newPod.Namespace, newPod.Name, groupName)
-		}
-	case v1.PodUnknown:
-		// don't know how to deal with it as it's unknown phase
-	default:
-		// shouldn't enter this branch
-	}
-}
-
-// deletePod handles the delete event handler
-func (f *Fluence) deletePod(podObj interface{}) {
-	klog.Info("[Fluence] Delete Pod event handler")
-	pod := podObj.(*v1.Pod)
-	groupName, pg := f.pgMgr.GetPodGroup(context.TODO(), pod)
-
-	// If PodGroup is nil, still try to look up a faux name
-	if pg == nil {
-		pg = fgroup.CreateFakeGroup(pod)
-		groupName = pg.Name
-	}
-
-	klog.Infof("[Fluence] Delete pod %s in group %s has status %s", pod.Status.Phase, pod.Name, groupName)
-	switch pod.Status.Phase {
-	case v1.PodSucceeded:
-	case v1.PodPending:
-		klog.Infof("[Fluence] Pod %s completed and is Pending termination, Fluence needs to free the resources", pod.Name)
-
-		f.mutex.Lock()
-		defer f.mutex.Unlock()
-
-		_, ok := f.groupToJobId[groupName]
-		if ok {
-			f.cancelFluxJob(groupName)
-		} else {
-			klog.Infof("[Fluence] Terminating pod %s/%s in group %s doesn't have flux jobid", pod.Namespace, pod.Name, groupName)
-		}
-	case v1.PodRunning:
-		f.mutex.Lock()
-		defer f.mutex.Unlock()
-
-		_, ok := f.groupToJobId[groupName]
-		if ok {
-			f.cancelFluxJob(groupName)
-		} else {
-			klog.Infof("[Fluence] Deleted pod %s/%s in group %s doesn't have flux jobid", pod.Namespace, pod.Name, groupName)
-		}
-	}
-}
diff --git a/sig-scheduler-plugins/pkg/fluence/fluence.go b/sig-scheduler-plugins/pkg/fluence/fluence.go
index 33976ae..1ad1fd3 100644
--- a/sig-scheduler-plugins/pkg/fluence/fluence.go
+++ b/sig-scheduler-plugins/pkg/fluence/fluence.go
@@ -1,124 +1,140 @@
+/*
+Copyright 2020 The Kubernetes Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
 package fluence
 
 import (
 	"context"
 	"fmt"
-	"os"
 	"sync"
 	"time"
 
-	"google.golang.org/grpc"
 	v1 "k8s.io/api/core/v1"
-	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
-	"k8s.io/apimachinery/pkg/fields"
+	"k8s.io/apimachinery/pkg/labels"
 	"k8s.io/apimachinery/pkg/runtime"
-	"k8s.io/client-go/informers"
 	clientscheme "k8s.io/client-go/kubernetes/scheme"
 	"k8s.io/client-go/tools/cache"
+
+	fgroup "sigs.k8s.io/scheduler-plugins/pkg/fluence/group"
+	label "sigs.k8s.io/scheduler-plugins/pkg/fluence/labels"
+
 	corev1helpers "k8s.io/component-helpers/scheduling/corev1"
-	klog "k8s.io/klog/v2"
+	"k8s.io/klog/v2"
 	"k8s.io/kubernetes/pkg/scheduler/framework"
-
 	"sigs.k8s.io/controller-runtime/pkg/client"
-	sched "sigs.k8s.io/scheduler-plugins/apis/scheduling/v1alpha1"
-	coschedulingcore "sigs.k8s.io/scheduler-plugins/pkg/coscheduling/core"
+	"sigs.k8s.io/scheduler-plugins/pkg/util"
+
+	"sigs.k8s.io/scheduler-plugins/apis/config"
+	"sigs.k8s.io/scheduler-plugins/apis/scheduling"
+	"sigs.k8s.io/scheduler-plugins/apis/scheduling/v1alpha1"
 	fcore "sigs.k8s.io/scheduler-plugins/pkg/fluence/core"
-	pb "sigs.k8s.io/scheduler-plugins/pkg/fluence/fluxcli-grpc"
-	fgroup "sigs.k8s.io/scheduler-plugins/pkg/fluence/group"
-	"sigs.k8s.io/scheduler-plugins/pkg/fluence/utils"
 )
 
+// Fluence schedules pods in a group using Fluxion as a backend
+// We inherit cosched.Coscheduling to use some of the primary functions
 type Fluence struct {
 	mutex  sync.Mutex
-	handle framework.Handle
 	client client.Client
 
 	// Store jobid on the level of a group (which can be a single pod)
 	groupToJobId map[string]uint64
-	pgMgr        coschedulingcore.Manager
-}
 
-// Name is the name of the plugin used in the Registry and configurations.
-// Note that this would do better as an annotation (fluence.flux-framework.org/pod-group)
-// But we cannot use them as selectors then!
-const (
-	Name = "Fluence"
-)
+	frameworkHandler framework.Handle
+	pgMgr            fcore.Manager
+	scheduleTimeout  *time.Duration
+	pgBackoff        *time.Duration
+}
 
 var (
-	_ framework.QueueSortPlugin = &Fluence{}
-	_ framework.PreFilterPlugin = &Fluence{}
-	_ framework.FilterPlugin    = &Fluence{}
+	_ framework.QueueSortPlugin   = &Fluence{}
+	_ framework.PreFilterPlugin   = &Fluence{}
+	_ framework.PostFilterPlugin  = &Fluence{} // Here down are from coscheduling
+	_ framework.PermitPlugin      = &Fluence{}
+	_ framework.ReservePlugin     = &Fluence{}
+	_ framework.EnqueueExtensions = &Fluence{}
 )
 
-func (f *Fluence) Name() string {
-	return Name
-}
+const (
+	// Name is the name of the plugin used in Registry and configurations.
+	Name = "Fluence"
+)
 
 // Initialize and return a new Fluence Custom Scheduler Plugin
-// This class and functions are analogous to:
-// https://github.com/kubernetes-sigs/scheduler-plugins/blob/master/pkg/coscheduling/coscheduling.go#L63
-func New(_ runtime.Object, handle framework.Handle) (framework.Plugin, error) {
+func New(obj runtime.Object, handle framework.Handle) (framework.Plugin, error) {
 
-	f := &Fluence{handle: handle, groupToJobId: make(map[string]uint64)}
-
-	ctx := context.TODO()
-	fcore.Init()
-
-	fluxPodsInformer := handle.SharedInformerFactory().Core().V1().Pods().Informer()
-	fluxPodsInformer.AddEventHandler(cache.ResourceEventHandlerFuncs{
-		UpdateFunc: f.updatePod,
-		DeleteFunc: f.deletePod,
-	})
-
-	go fluxPodsInformer.Run(ctx.Done())
+	// Keep these empty for now, use defaults
+	args := config.CoschedulingArgs{}
 
 	scheme := runtime.NewScheme()
-	clientscheme.AddToScheme(scheme)
-	v1.AddToScheme(scheme)
-	sched.AddToScheme(scheme)
-	k8scli, err := client.New(handle.KubeConfig(), client.Options{Scheme: scheme})
-	if err != nil {
-		return nil, err
-	}
-
-	// Save the kubernetes client for fluence to interact with cluster objects
-	f.client = k8scli
+	_ = clientscheme.AddToScheme(scheme)
+	_ = v1.AddToScheme(scheme)
+	_ = v1alpha1.AddToScheme(scheme)
 
-	fieldSelector, err := fields.ParseSelector(",status.phase!=" + string(v1.PodSucceeded) + ",status.phase!=" + string(v1.PodFailed))
+	client, err := client.New(handle.KubeConfig(), client.Options{Scheme: scheme})
 	if err != nil {
-		klog.Errorf("ParseSelector failed %s", err)
-		os.Exit(1)
+		return nil, err
 	}
 
-	informerFactory := informers.NewSharedInformerFactoryWithOptions(handle.ClientSet(), 0, informers.WithTweakListOptions(func(opt *metav1.ListOptions) {
-		opt.FieldSelector = fieldSelector.String()
-	}))
-	podInformer := informerFactory.Core().V1().Pods()
-	scheduleTimeDuration := time.Duration(500) * time.Second
+	// Performance improvement when retrieving list of objects by namespace or we'll log 'index not exist' warning.
+	handle.SharedInformerFactory().Core().V1().Pods().Informer().AddIndexers(cache.Indexers{cache.NamespaceIndex: cache.MetaNamespaceIndexFunc})
 
-	// https://github.com/kubernetes-sigs/scheduler-plugins/blob/master/pkg/coscheduling/core/core.go#L84
-	pgMgr := coschedulingcore.NewPodGroupManager(
-		k8scli,
+	// PermitWaitingTimeSeconds is the waiting timeout in seconds.
+	scheduleTimeDuration := time.Duration(args.PermitWaitingTimeSeconds) * time.Second
+	pgMgr := fcore.NewPodGroupManager(
+		client,
 		handle.SnapshotSharedLister(),
 		&scheduleTimeDuration,
-		podInformer,
+		// Keep the podInformer (from frameworkHandle) as the single source of Pods.
+		handle.SharedInformerFactory().Core().V1().Pods(),
 	)
-	f.pgMgr = pgMgr
 
-	// stopCh := make(chan struct{})
-	// defer close(stopCh)
-	// informerFactory.Start(stopCh)
-	informerFactory.Start(ctx.Done())
+	// The main difference here is adding the groupToJobId lookup
+	plugin := &Fluence{
+		frameworkHandler: handle,
+		pgMgr:            pgMgr,
+		scheduleTimeout:  &scheduleTimeDuration,
+		groupToJobId:     make(map[string]uint64),
+	}
 
-	if !cache.WaitForCacheSync(ctx.Done(), podInformer.Informer().HasSynced) {
-		err := fmt.Errorf("WaitForCacheSync failed")
-		klog.ErrorS(err, "Cannot sync caches")
+	// PodGroupBackoffSeconds:  backoff time in seconds before a pod group can be scheduled again.
+	if args.PodGroupBackoffSeconds < 0 {
+		err := fmt.Errorf("parse arguments failed")
+		klog.ErrorS(err, "PodGroupBackoffSeconds cannot be negative")
 		return nil, err
+	} else if args.PodGroupBackoffSeconds > 0 {
+		pgBackoff := time.Duration(args.PodGroupBackoffSeconds) * time.Second
+		plugin.pgBackoff = &pgBackoff
 	}
+	return plugin, nil
+}
+
+func (f *Fluence) Name() string {
+	return Name
+}
 
-	klog.Info("Fluence scheduler plugin started")
-	return f, nil
+// Fluence has added delete, although I wonder if update includes that signal
+// and it's redundant?
+func (f *Fluence) EventsToRegister() []framework.ClusterEventWithHint {
+	// To register a custom event, follow the naming convention at:
+	// https://git.k8s.io/kubernetes/pkg/scheduler/eventhandlers.go#L403-L410
+	pgGVK := fmt.Sprintf("podgroups.v1alpha1.%v", scheduling.GroupName)
+	return []framework.ClusterEventWithHint{
+		{Event: framework.ClusterEvent{Resource: framework.Pod, ActionType: framework.Add | framework.Delete}},
+		{Event: framework.ClusterEvent{Resource: framework.GVK(pgGVK), ActionType: framework.Add | framework.Update | framework.Delete}},
+	}
 }
 
 // Less is used to sort pods in the scheduling queue in the following order.
@@ -147,177 +163,131 @@ func (f *Fluence) Less(podInfo1, podInfo2 *framework.QueuedPodInfo) bool {
 
 	// If they are the same, fall back to sorting by name.
 	if creationTime1.Equal(&creationTime2) {
-		return coschedulingcore.GetNamespacedName(podInfo1.Pod) < coschedulingcore.GetNamespacedName(podInfo2.Pod)
+		return fcore.GetNamespacedName(podInfo1.Pod) < fcore.GetNamespacedName(podInfo2.Pod)
 	}
 	return creationTime1.Before(&creationTime2)
-}
-
-// PreFilter checks info about the Pod / checks conditions that the cluster or the Pod must meet.
-// This comes after sort
-func (f *Fluence) PreFilter(
-	ctx context.Context,
-	state *framework.CycleState,
-	pod *v1.Pod,
-) (*framework.PreFilterResult, *framework.Status) {
 
-	klog.Infof("[Fluence] Examining pod %s", pod.Name)
-
-	// groupName will be named according to the single pod namespace / pod if there wasn't
-	// a user defined group. This is a size 1 group we handle equivalently.
-	groupName, pg := f.pgMgr.GetPodGroup(ctx, pod)
+}
 
-	// If we don't have a pod group and it's here, it was asked to be scheduled by fluence
-	// but the group isn't ready. Unshedulable for now.
-	if pg == nil {
-		klog.Infof("[Fluence] Group %s/%s does not have a pod group, not schedulable yet.", pod.Namespace, pod.Name)
-		return nil, framework.NewStatus(framework.Unschedulable, "Missing podgroup")
-	}
-	klog.Infof("[Fluence] Pod %s is in group %s with minimum members %d", pod.Name, groupName, pg.Spec.MinMember)
-
-	// Has this podgroup been seen by fluence yet? If yes, we will have it in the cache
-	cache := fcore.GetFluenceCache(groupName)
-	klog.Infof("[Fluence] cache %s", cache)
-
-	// Fluence has never seen this before, we need to schedule an allocation
-	// It also could have been seen, but was not able to get one.
-	if cache == nil {
-		klog.Infof("[Fluence] Does not have nodes for %s yet, asking Fluxion", groupName)
-
-		// groupName is the namespaced name <namespace>/<name>
-		err := f.AskFlux(ctx, pod, pg, groupName)
-		if err != nil {
-			klog.Infof("[Fluence] Fluxion returned an error %s, not schedulable", err.Error())
-			return nil, framework.NewStatus(framework.Unschedulable, err.Error())
-		}
+// PreFilter performs the following validations.
+// 1. Whether the PodGroup that the Pod belongs to is on the deny list.
+// 2. Whether the total number of pods in a PodGroup is less than its `minMember`.
+func (f *Fluence) PreFilter(ctx context.Context, state *framework.CycleState, pod *v1.Pod) (*framework.PreFilterResult, *framework.Status) {
+	// If PreFilter fails, return framework.UnschedulableAndUnresolvable to avoid
+	// any preemption attempts.
+	if err := f.pgMgr.PreFilter(ctx, pod); err != nil {
+		klog.ErrorS(err, "PreFilter failed", "pod", klog.KObj(pod))
+		return nil, framework.NewStatus(framework.UnschedulableAndUnresolvable, err.Error())
 	}
-
-	// This is the next node in the list
-	nodename, err := fcore.GetNextNode(groupName)
-	if err != nil {
-		return nil, framework.NewStatus(framework.Unschedulable, err.Error())
-	}
-	klog.Infof("Node Selected %s (pod %s:group %s)", nodename, pod.Name, groupName)
-
-	// Create a fluxState (CycleState) with things that might be useful
-	// This isn't a PodGroupCache, but a single node cache, which also
-	// has group information, but just is for one node. Note that assigned
-	// tasks is hard coded to 1 but this isn't necessarily the case - we should
-	// eventually be able to GetNextNode for a number of tasks, for example
-	// (unless task == pod in which case it is always 1)
-	nodeCache := fcore.NodeCache{NodeName: nodename, GroupName: groupName, AssignedTasks: 1}
-	state.Write(framework.StateKey(pod.Name), &fcore.FluxStateData{NodeCache: nodeCache})
 	return nil, framework.NewStatus(framework.Success, "")
 }
 
-// TODO we need to account for affinity here
-func (f *Fluence) Filter(
-	ctx context.Context,
-	cycleState *framework.CycleState,
-	pod *v1.Pod,
-	nodeInfo *framework.NodeInfo,
-) *framework.Status {
+// PostFilter is used to reject a group of pods if a pod does not pass PreFilter or Filter.
+func (f *Fluence) PostFilter(ctx context.Context, state *framework.CycleState, pod *v1.Pod,
+	filteredNodeStatusMap framework.NodeToStatusMap) (*framework.PostFilterResult, *framework.Status) {
+	pgName, pg := f.pgMgr.GetPodGroup(ctx, pod)
+	if pg == nil {
+		klog.V(4).InfoS("Pod does not belong to any group", "pod", klog.KObj(pod))
+		return &framework.PostFilterResult{}, framework.NewStatus(framework.Unschedulable, "can not find pod group")
+	}
 
-	klog.Info("Filtering input node ", nodeInfo.Node().Name)
-	state, err := cycleState.Read(framework.StateKey(pod.Name))
+	// This indicates there are already enough Pods satisfying the PodGroup,
+	// so don't bother to reject the whole PodGroup.
+	assigned := f.pgMgr.CalculateAssignedPods(pg.Name, pod.Namespace)
+	if assigned >= int(pg.Spec.MinMember) {
+		klog.V(4).InfoS("Assigned pods", "podGroup", klog.KObj(pg), "assigned", assigned)
+		return &framework.PostFilterResult{}, framework.NewStatus(framework.Unschedulable)
+	}
 
-	// No error means we retrieved the state
-	if err == nil {
+	// If the gap is less than/equal 10%, we may want to try subsequent Pods
+	// to see they can satisfy the PodGroup
+	notAssignedPercentage := float32(int(pg.Spec.MinMember)-assigned) / float32(pg.Spec.MinMember)
+	if notAssignedPercentage <= 0.1 {
+		klog.V(4).InfoS("A small gap of pods to reach the quorum", "podGroup", klog.KObj(pg), "percentage", notAssignedPercentage)
+		return &framework.PostFilterResult{}, framework.NewStatus(framework.Unschedulable)
+	}
 
-		// Try to convert the state to FluxStateDate
-		value, ok := state.(*fcore.FluxStateData)
+	// It's based on an implicit assumption: if the nth Pod failed,
+	// it's inferrable other Pods belonging to the same PodGroup would be very likely to fail.
+	f.frameworkHandler.IterateOverWaitingPods(func(waitingPod framework.WaitingPod) {
+		if waitingPod.GetPod().Namespace == pod.Namespace && label.GetPodGroupLabel(waitingPod.GetPod()) == pg.Name {
+			klog.V(3).InfoS("PostFilter rejects the pod", "podGroup", klog.KObj(pg), "pod", klog.KObj(waitingPod.GetPod()))
+			waitingPod.Reject(f.Name(), "optimistic rejection in PostFilter")
+		}
+	})
 
-		// If we have state data that isn't equal to the current assignment, no go
-		if ok && value.NodeCache.NodeName != nodeInfo.Node().Name {
-			return framework.NewStatus(framework.Unschedulable, "pod is not permitted")
-		} else {
-			klog.Infof("Filter: node %s selected for %s\n", value.NodeCache.NodeName, pod.Name)
+	if f.pgBackoff != nil {
+		pods, err := f.frameworkHandler.SharedInformerFactory().Core().V1().Pods().Lister().Pods(pod.Namespace).List(
+			labels.SelectorFromSet(labels.Set{v1alpha1.PodGroupLabel: label.GetPodGroupLabel(pod)}),
+		)
+		if err == nil && len(pods) >= int(pg.Spec.MinMember) {
+			f.pgMgr.BackoffPodGroup(pgName, *f.pgBackoff)
 		}
 	}
-	return framework.NewStatus(framework.Success)
+
+	f.pgMgr.DeletePermittedPodGroup(pgName)
+	return &framework.PostFilterResult{}, framework.NewStatus(framework.Unschedulable,
+		fmt.Sprintf("PodGroup %v gets rejected due to Pod %v is unschedulable even after PostFilter", pgName, pod.Name))
 }
 
-// PreFilterExtensions allow for callbacks on filtered states
-// https://github.com/kubernetes/kubernetes/blob/master/pkg/scheduler/framework/interface.go#L383
+// PreFilterExtensions returns a PreFilterExtensions interface if the plugin implements one.
 func (f *Fluence) PreFilterExtensions() framework.PreFilterExtensions {
 	return nil
 }
 
-// AskFlux will ask flux for an allocation for nodes for the pod group.
-func (f *Fluence) AskFlux(
-	ctx context.Context,
-	pod *v1.Pod,
-	pg *sched.PodGroup,
-	groupName string,
-) error {
-
-	// clean up previous match if a pod has already allocated previously
-	f.mutex.Lock()
-	_, isAllocated := f.groupToJobId[groupName]
-	f.mutex.Unlock()
-
-	// This case happens when there is some reason that an initial job pods partially allocated,
-	// but then the job restarted, and new pods are present but fluence had assigned nodes to
-	// the old ones (and there aren't enough). The job would have had to complete in some way,
-	// and the PodGroup would have to then recreate, and have the same job id (the group name).
-	// This happened when I cancalled a bunch of jobs and they didn't have the chance to
-	// cancel in fluence. What we can do here is assume the previous pods are no longer running
-	// and cancel the flux job to create again.
-	if isAllocated {
-		klog.Info("Warning - group %s was previously allocated and is requesting again, so must have completed.", groupName)
-		f.mutex.Lock()
-		f.cancelFluxJob(groupName)
-		f.mutex.Unlock()
-	}
-
-	// IMPORTANT: this is a JobSpec for *one* pod, assuming they are all the same.
-	// This obviously may not be true if we have a hetereogenous PodGroup.
-	// We name it based on the group, since it will represent the group
-	jobspec := utils.PreparePodJobSpec(pod, groupName)
-	klog.Infof("[Fluence] Inspect pod info, jobspec: %s\n", jobspec)
-	conn, err := grpc.Dial("127.0.0.1:4242", grpc.WithInsecure())
-
-	// TODO change this to just return fmt.Errorf
-	if err != nil {
-		klog.Errorf("[Fluence] Error connecting to server: %v\n", err)
-		return err
+// Permit is the functions invoked by the framework at "Permit" extension point.
+func (f *Fluence) Permit(ctx context.Context, state *framework.CycleState, pod *v1.Pod, nodeName string) (*framework.Status, time.Duration) {
+	waitTime := *f.scheduleTimeout
+	s := f.pgMgr.Permit(ctx, pod)
+	var retStatus *framework.Status
+	switch s {
+	case fcore.PodGroupNotSpecified:
+		return framework.NewStatus(framework.Success, ""), 0
+	case fcore.PodGroupNotFound:
+		return framework.NewStatus(framework.Unschedulable, "PodGroup not found"), 0
+	case fcore.Wait:
+		klog.InfoS("Pod is waiting to be scheduled to node", "pod", klog.KObj(pod), "nodeName", nodeName)
+		_, pg := f.pgMgr.GetPodGroup(ctx, pod)
+
+		// Note this is in seconds, defaults to 60 seconds
+		if wait := util.GetWaitTimeDuration(pg, f.scheduleTimeout); wait != 0 {
+			waitTime = wait
+		}
+		retStatus = framework.NewStatus(framework.Wait)
+		// We will also request to move the sibling pods back to activeQ.
+		f.pgMgr.ActivateSiblings(pod, state)
+	case fcore.Success:
+		pgFullName := label.GetPodGroupFullName(pod)
+		f.frameworkHandler.IterateOverWaitingPods(func(waitingPod framework.WaitingPod) {
+			if label.GetPodGroupFullName(waitingPod.GetPod()) == pgFullName {
+				klog.V(3).InfoS("Permit allows", "pod", klog.KObj(waitingPod.GetPod()))
+				waitingPod.Allow(f.Name())
+			}
+		})
+		klog.V(3).InfoS("Permit allows", "pod", klog.KObj(pod))
+		retStatus = framework.NewStatus(framework.Success)
+		waitTime = 0
 	}
-	defer conn.Close()
 
-	grpcclient := pb.NewFluxcliServiceClient(conn)
-	_, cancel := context.WithTimeout(context.Background(), 200*time.Second)
-	defer cancel()
+	return retStatus, waitTime
+}
 
-	request := &pb.MatchRequest{
-		Ps:      jobspec,
-		Request: "allocate",
-		Count:   pg.Spec.MinMember,
-	}
+// Reserve is the functions invoked by the framework at "reserve" extension point.
+func (f *Fluence) Reserve(ctx context.Context, state *framework.CycleState, pod *v1.Pod, nodeName string) *framework.Status {
+	return nil
+}
 
-	// An error here is an error with making the request
-	r, err := grpcclient.Match(context.Background(), request)
-	if err != nil {
-		klog.Errorf("[Fluence] did not receive any match response: %v\n", err)
-		return err
+// Unreserve rejects all other Pods in the PodGroup when one of the pods in the group times out.
+func (f *Fluence) Unreserve(ctx context.Context, state *framework.CycleState, pod *v1.Pod, nodeName string) {
+	pgName, pg := f.pgMgr.GetPodGroup(ctx, pod)
+	if pg == nil {
+		return
 	}
-
-	// TODO GetPodID should be renamed, because it will reflect the group
-	klog.Infof("[Fluence] Match response ID %s\n", r.GetPodID())
-
-	// Get the nodelist and inspect
-	nodes := r.GetNodelist()
-	klog.Infof("[Fluence] Nodelist returned from Fluxion: %s\n", nodes)
-
-	// Assign the nodelist - this sets the group name in the groupSeen cache
-	// at this point, we can retrieve the cache and get nodes
-	nodelist := fcore.CreateNodeList(nodes, groupName)
-
-	jobid := uint64(r.GetJobID())
-	klog.Infof("[Fluence] parsed node pods list %s for job id %d\n", nodelist, jobid)
-
-	// TODO would be nice to actually be able to ask flux jobs -a to fluence
-	// That way we can verify assignments, etc.
-	f.mutex.Lock()
-	f.groupToJobId[groupName] = jobid
-	f.mutex.Unlock()
-	return nil
+	f.frameworkHandler.IterateOverWaitingPods(func(waitingPod framework.WaitingPod) {
+		if waitingPod.GetPod().Namespace == pod.Namespace && label.GetPodGroupLabel(waitingPod.GetPod()) == pg.Name {
+			klog.V(3).InfoS("Unreserve rejects", "pod", klog.KObj(waitingPod.GetPod()), "podGroup", klog.KObj(pg))
+			waitingPod.Reject(f.Name(), "rejection in Unreserve")
+		}
+	})
+	f.pgMgr.DeletePermittedPodGroup(pgName)
 }
diff --git a/sig-scheduler-plugins/pkg/fluence/labels/labels.go b/sig-scheduler-plugins/pkg/fluence/labels/labels.go
index e377d97..f955d67 100644
--- a/sig-scheduler-plugins/pkg/fluence/labels/labels.go
+++ b/sig-scheduler-plugins/pkg/fluence/labels/labels.go
@@ -1,14 +1,33 @@
+/*
+Copyright 2020 The Kubernetes Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
 package labels
 
 import (
+	"fmt"
 	"time"
 
+	v1 "k8s.io/api/core/v1"
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 )
 
 // Labels to be shared between different components
 
 const (
+	// We use the same label to be consistent
 	// https://github.com/kubernetes-sigs/scheduler-plugins/blob/master/apis/scheduling/v1alpha1/types.go#L109
 	PodGroupLabel = "scheduling.x-k8s.io/pod-group"
 
@@ -16,10 +35,29 @@ const (
 	//PodGroupNameLabel = "fluence.pod-group"
 	PodGroupSizeLabel = "fluence.group-size"
 
-	// Internal use
+	// Internal use (not used yet)
 	PodGroupTimeCreated = "flunce.created-at"
 )
 
+// GetPodGroupLabel get pod group name from pod labels
+func GetPodGroupLabel(pod *v1.Pod) string {
+	return pod.Labels[PodGroupLabel]
+}
+
+// GetPodGroupFullName get namespaced group name from pod labels
+func GetPodGroupFullName(pod *v1.Pod) string {
+	pgName := GetPodGroupLabel(pod)
+	if len(pgName) == 0 {
+		return ""
+	}
+	return fmt.Sprintf("%v/%v", pod.Namespace, pgName)
+}
+
+// GetPodGroupSize gets the pod group size from the label
+func GetPodGroupSize(pod *v1.Pod) string {
+	return pod.Labels[PodGroupSizeLabel]
+}
+
 // getTimeCreated returns the timestamp when we saw the object
 func GetTimeCreated() string {
 

From 726149c421d981a55f63be224997d37fcbacea8d Mon Sep 17 00:00:00 2001
From: vsoch <vsoch@users.noreply.github.com>
Date: Thu, 14 Mar 2024 02:14:45 -0600
Subject: [PATCH 22/28] update: adding back in fluence logic

Problem: fluence is missing!
Solution: add back fluence. This is a different design in that we do the asking
from the perspective of the pod group, meaning that we get back a full set of nodes,
and save them (assigned exactly) to specific pods. This could also be more lenient -
e.g., creating a cache of the list and then taking off the cache, but I like the
finer granularity of 1:1 mapping for future issues that might arise (where one
pod needs a new node). This design also introduces a nice feature that we can ask
for the resources (meaning creating a jobspec) for exactly what we need across pods
for the group because we are listing all pods for the group before we generate
the jobspec. I left it as it currently was before (using one representative pod)
to not incur too many changes but this definitely can be tried. There is likely
more work to be done to test edge cases and account for resources when fluence
starts (and be able to load a state if it restarts) but this is pretty great for
a first shot! The local lammps experiment ran without clogging and I am testing
on GKE as a next step. Finally, I think there is a lot of poetential error in
allowing a ton of other PreFilter plugins to exist, each of which could return
their own set of nodes to consider that might mismatch what fluence has decided
on. For this reason I have done aggressive pruning and we can add things back as
we see fit.

Signed-off-by: vsoch <vsoch@users.noreply.github.com>
---
 Makefile                                      |   2 +
 README.md                                     |  15 +-
 sig-scheduler-plugins/cmd/scheduler/main.go   |  13 +-
 .../templates/configmap.yaml                  |  64 +++++
 .../charts/as-a-second-scheduler/values.yaml  |  17 +-
 .../pkg/fluence/core/core.go                  | 210 ++++++--------
 .../pkg/fluence/core/flux.go                  | 259 ++++++++++++++++++
 sig-scheduler-plugins/pkg/fluence/fluence.go  | 222 +++++----------
 8 files changed, 502 insertions(+), 300 deletions(-)
 create mode 100644 sig-scheduler-plugins/manifests/install/charts/as-a-second-scheduler/templates/configmap.yaml
 create mode 100644 sig-scheduler-plugins/pkg/fluence/core/flux.go

diff --git a/Makefile b/Makefile
index 6ab44fe..91789d8 100644
--- a/Makefile
+++ b/Makefile
@@ -26,12 +26,14 @@ update: clone
 prepare: clone
 	# These are entirely new directory structures
 	rm -rf $(CLONE_UPSTREAM)/pkg/fluence
+	# rm -rf $(CLONE_UPSTREAM)/cmd/app
 	rm -rf $(CLONE_UPSTREAM)/pkg/controllers/podgroup_controller.go
 	rm -rf $(CLONE_UPSTREAM)/cmd/controller/app/server.go
 	cp -R sig-scheduler-plugins/pkg/fluence $(CLONE_UPSTREAM)/pkg/fluence
 	cp -R sig-scheduler-plugins/pkg/controllers/* $(CLONE_UPSTREAM)/pkg/controllers/
 	# This is the one exception not from sig-scheduler-plugins because it is needed in both spots
 	cp -R src/fluence/fluxcli-grpc $(CLONE_UPSTREAM)/pkg/fluence/fluxcli-grpc
+	# cp -R sig-scheduler-plugins/cmd/app ./upstream/cmd/app
 	# These are files with subtle changes to add fluence
 	cp sig-scheduler-plugins/cmd/scheduler/main.go ./upstream/cmd/scheduler/main.go
 	cp sig-scheduler-plugins/manifests/install/charts/as-a-second-scheduler/templates/*.yaml $(CLONE_UPSTREAM)/manifests/install/charts/as-a-second-scheduler/templates/
diff --git a/README.md b/README.md
index 89f2a18..8c51de7 100644
--- a/README.md
+++ b/README.md
@@ -8,9 +8,11 @@ Fluence enables HPC-grade pod scheduling in Kubernetes via the [Kubernetes Sched
 
 ## TODO
 
-- Need to list pods, get state, and if is completed, cancel the job id.
-- Keep track of state of all pods in group, when all of pods are completed, then issue cancel.
-- Calculate on the fly - on the update event we want to loop through pods, if ALL completed, then delete the podid for fluence.
+- On init, need to load in resource graph that accounts for running stuff
+- Need to allow for restart / crashes and looking up existing jobid, updating maps in PodGroup
+- Since AskFlux is done on level of pod group, refactor function to account for specific resources of all pods (not just one pod)
+- Figure out if EventsToRegister replaces old informer
+- Would be nice to see the state of fluxion (retest the kubectl-fluence plugin)
 
 ## Getting started
 
@@ -526,13 +528,6 @@ For this setup if you are developing locally with kind, you will need to enable
 kind create cluster --config ./kind-config.yaml
 ```
 
-#### TODO
-
- - Try what [kueue does](https://github.com/kubernetes-sigs/kueue/blob/6d57813a52066dab412735deeeb60ebb0cdb8e8e/cmd/kueue/main.go#L146-L155) to not require cert-manager.
- - Try other strategies for setting owner references (so cleans up when owner deleted)
-   - When that is done, add tests for deletion of pod group (the current method is not perfect and needs improvement)
-- We really need to see the state of fluxion - I had this running for about 6 hours in kind, and at some point it just stopped working. I deleted and re-created the cluster and it was restored. It could be a development hiccup but would be good to know!
-
 #### Components
 
  - [FluxStateData](sig-scheduler-plugins/pkg/fluence/core/core.go): is given to the [framework.CycleState](https://github.com/kubernetes/kubernetes/blob/242b41b36a20032f99e8a059ca0a5d764105217b/pkg/scheduler/framework/cycle_state.go#L48) and serves as a vehicle to store a cache of node name assignment.
diff --git a/sig-scheduler-plugins/cmd/scheduler/main.go b/sig-scheduler-plugins/cmd/scheduler/main.go
index 2b21d28..4d98d52 100644
--- a/sig-scheduler-plugins/cmd/scheduler/main.go
+++ b/sig-scheduler-plugins/cmd/scheduler/main.go
@@ -22,6 +22,10 @@ import (
 	"k8s.io/component-base/cli"
 	_ "k8s.io/component-base/metrics/prometheus/clientgo" // for rest client metric registration
 	_ "k8s.io/component-base/metrics/prometheus/version"  // for version metric registration
+
+	// Uncomment here for a local one here we use to debug
+	// This was a clone from kubernetes/kubernetes -> cmd/app
+	//"sigs.k8s.io/scheduler-plugins/cmd/app"
 	"k8s.io/kubernetes/cmd/kube-scheduler/app"
 
 	"sigs.k8s.io/scheduler-plugins/pkg/capacityscheduling"
@@ -29,15 +33,14 @@ import (
 	"sigs.k8s.io/scheduler-plugins/pkg/fluence"
 	"sigs.k8s.io/scheduler-plugins/pkg/networkaware/networkoverhead"
 	"sigs.k8s.io/scheduler-plugins/pkg/networkaware/topologicalsort"
-	"sigs.k8s.io/scheduler-plugins/pkg/noderesources"
 	"sigs.k8s.io/scheduler-plugins/pkg/noderesourcetopology"
-	"sigs.k8s.io/scheduler-plugins/pkg/podstate"
 	"sigs.k8s.io/scheduler-plugins/pkg/preemptiontoleration"
-	"sigs.k8s.io/scheduler-plugins/pkg/qos"
 	"sigs.k8s.io/scheduler-plugins/pkg/trimaran/loadvariationriskbalancing"
-	"sigs.k8s.io/scheduler-plugins/pkg/trimaran/lowriskovercommitment"
 	"sigs.k8s.io/scheduler-plugins/pkg/trimaran/targetloadpacking"
 
+	"sigs.k8s.io/scheduler-plugins/pkg/podstate"
+	"sigs.k8s.io/scheduler-plugins/pkg/qos"
+
 	// Ensure scheme package is initialized.
 	_ "sigs.k8s.io/scheduler-plugins/apis/config/scheme"
 )
@@ -52,11 +55,9 @@ func main() {
 		app.WithPlugin(loadvariationriskbalancing.Name, loadvariationriskbalancing.New),
 		app.WithPlugin(networkoverhead.Name, networkoverhead.New),
 		app.WithPlugin(topologicalsort.Name, topologicalsort.New),
-		app.WithPlugin(noderesources.AllocatableName, noderesources.NewAllocatable),
 		app.WithPlugin(noderesourcetopology.Name, noderesourcetopology.New),
 		app.WithPlugin(preemptiontoleration.Name, preemptiontoleration.New),
 		app.WithPlugin(targetloadpacking.Name, targetloadpacking.New),
-		app.WithPlugin(lowriskovercommitment.Name, lowriskovercommitment.New),
 		app.WithPlugin(podstate.Name, podstate.New),
 		app.WithPlugin(qos.Name, qos.New),
 		app.WithPlugin(fluence.Name, fluence.New),
diff --git a/sig-scheduler-plugins/manifests/install/charts/as-a-second-scheduler/templates/configmap.yaml b/sig-scheduler-plugins/manifests/install/charts/as-a-second-scheduler/templates/configmap.yaml
new file mode 100644
index 0000000..9f3d8bf
--- /dev/null
+++ b/sig-scheduler-plugins/manifests/install/charts/as-a-second-scheduler/templates/configmap.yaml
@@ -0,0 +1,64 @@
+{{- if .Values.plugins.enabled }}
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: scheduler-config
+  namespace: {{ .Release.Namespace }}
+data:
+  scheduler-config.yaml: |
+    apiVersion: kubescheduler.config.k8s.io/v1
+    kind: KubeSchedulerConfiguration
+    leaderElection:
+      leaderElect: {{ .Values.scheduler.leaderElect }}
+    profiles:
+    # Compose all plugins in one profile
+    - schedulerName: {{ .Values.scheduler.name }}
+      plugins:
+        preBind:
+          disabled:
+           - name: {{ .Values.scheduler.name }}
+        filter:
+          disabled:
+          {{- range $.Values.plugins.disabledAll }}
+          - name: {{ title . }}
+          {{- end }}
+        reserve:
+          disabled:
+          {{- range $.Values.plugins.disabledAll }}
+          - name: {{ title . }}
+          {{- end }}
+        score:
+          disabled:
+          {{- range $.Values.plugins.disabledAll }}
+          - name: {{ title . }}
+          {{- end }}
+        preScore:
+          disabled:
+          {{- range $.Values.plugins.disabledAll }}
+          - name: {{ title . }}
+          {{- end }}
+        postFilter:
+          disabled:
+          {{- range $.Values.plugins.disabledAll }}
+          - name: {{ title . }}
+          {{- end }}
+        preFilter:
+          disabled:
+          {{- range $.Values.plugins.disabledAll }}
+          - name: {{ title . }}
+          {{- end }}
+        multiPoint:
+          enabled:
+          {{- range $.Values.plugins.enabled }}
+          - name: {{ title . }}
+          {{- end }}
+          disabled:
+          {{- range $.Values.plugins.disabled }}
+          - name: {{ title . }}
+          {{- end }}
+      {{- if $.Values.pluginConfig }}
+      pluginConfig: {{ toYaml $.Values.pluginConfig | nindent 6 }}
+      {{- end }}
+
+  {{- /* TODO: wire CRD installation with enabled plugins. */}}
+{{- end }}
diff --git a/sig-scheduler-plugins/manifests/install/charts/as-a-second-scheduler/values.yaml b/sig-scheduler-plugins/manifests/install/charts/as-a-second-scheduler/values.yaml
index e48aa98..4113209 100644
--- a/sig-scheduler-plugins/manifests/install/charts/as-a-second-scheduler/values.yaml
+++ b/sig-scheduler-plugins/manifests/install/charts/as-a-second-scheduler/values.yaml
@@ -31,14 +31,23 @@ controller:
 plugins:
   enabled: ["Fluence"]
   disabled: ["CapacityScheduling","NodeResourceTopologyMatch","NodeResourcesAllocatable","PrioritySort","Coscheduling"] # only in-tree plugins need to be defined here
+  # Disable EVERYTHING except for fluence
+  # VolumeBinding is required for PreBind, NodeResourcesFit is required or you'll get mismatches
+  # Yes - some of these are irrelevant for the use case here, but I'd rather be super
+  # conservative and be absolutely sure only fluence is running PreFilter to select nodes
+  disabledAll: ["NodePorts", "VolumeRestrictions", "EBSLimits",
+                "GCEPDLimits", "NodeVolumeLimits", "AzureDiskLimits", "VolumeZone", 
+                "PodTopologySpread", "InterPodAffinity", "NodeAffinity",
+                "NodeUnschedulable", "NodeName", "TaintToleration", "DefaultPreemtion",
+                "NodeResourcesBalancedAllocation", "ImageLocality"]
 
 # Customize the enabled plugins' config.
 # Refer to the "pluginConfig" section of manifests/<plugin>/scheduler-config.yaml.
 # For example, for Coscheduling plugin, you want to customize the permit waiting timeout to 10 seconds:
-pluginConfig:
-- name: Coscheduling
-  args:
-    permitWaitingTimeSeconds: 10 # default is 60
+# pluginConfig:
+# - name: Coscheduling
+#  args:
+#    permitWaitingTimeSeconds: 10 # default is 60
 # Or, customize the other plugins
 # - name: NodeResourceTopologyMatch
 #   args:
diff --git a/sig-scheduler-plugins/pkg/fluence/core/core.go b/sig-scheduler-plugins/pkg/fluence/core/core.go
index efa1127..eed9536 100644
--- a/sig-scheduler-plugins/pkg/fluence/core/core.go
+++ b/sig-scheduler-plugins/pkg/fluence/core/core.go
@@ -24,13 +24,13 @@ import (
 
 	gochache "github.com/patrickmn/go-cache"
 	corev1 "k8s.io/api/core/v1"
-	"k8s.io/apimachinery/pkg/api/resource"
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 	"k8s.io/apimachinery/pkg/labels"
 	"k8s.io/apimachinery/pkg/types"
 	informerv1 "k8s.io/client-go/informers/core/v1"
 	listerv1 "k8s.io/client-go/listers/core/v1"
-	"k8s.io/klog/v2"
+	klog "k8s.io/klog/v2"
+
 	"k8s.io/kubernetes/pkg/scheduler/framework"
 	"sigs.k8s.io/controller-runtime/pkg/client"
 
@@ -38,28 +38,25 @@ import (
 	"sigs.k8s.io/scheduler-plugins/pkg/util"
 )
 
-type Status string
+// TODO should eventually store group name here to reassociate on reload
+type FluxStateData struct {
+	NodeName string
+}
 
-const (
-	// PodGroupNotSpecified denotes no PodGroup is specified in the Pod spec.
-	PodGroupNotSpecified Status = "PodGroup not specified"
-	// PodGroupNotFound denotes the specified PodGroup in the Pod spec is
-	// not found in API server.
-	PodGroupNotFound Status = "PodGroup not found"
-	Success          Status = "Success"
-	Wait             Status = "Wait"
-)
+func (s *FluxStateData) Clone() framework.StateData {
+	clone := &FluxStateData{
+		NodeName: s.NodeName,
+	}
+	return clone
+}
 
 // Manager defines the interfaces for PodGroup management.
 type Manager interface {
-	PreFilter(context.Context, *corev1.Pod) error
-	Permit(context.Context, *corev1.Pod) Status
+	PreFilter(context.Context, *corev1.Pod, *framework.CycleState) error
+	GetPodNode(*corev1.Pod) string
 	GetPodGroup(context.Context, *corev1.Pod) (string, *v1alpha1.PodGroup)
 	GetCreationTimestamp(*corev1.Pod, time.Time) time.Time
 	DeletePermittedPodGroup(string)
-	CalculateAssignedPods(string, string) int
-	ActivateSiblings(pod *corev1.Pod, state *framework.CycleState)
-	BackoffPodGroup(string, time.Duration)
 }
 
 // PodGroupManager defines the scheduling operation called
@@ -77,7 +74,16 @@ type PodGroupManager struct {
 	backedOffPG *gochache.Cache
 	// podLister is pod lister
 	podLister listerv1.PodLister
+
+	// This isn't great to save state, but we can improve upon it
+	// we should have a way to load jobids into this if fluence is recreated
+	// If we can annotate them in fluxion and query for that, we can!
+	groupToJobId map[string]uint64
+	podToNode    map[string]string
+
+	// Probably should just choose one... oh well
 	sync.RWMutex
+	mutex sync.Mutex
 }
 
 // NewPodGroupManager creates a new operation object.
@@ -89,59 +95,37 @@ func NewPodGroupManager(client client.Client, snapshotSharedLister framework.Sha
 		podLister:            podInformer.Lister(),
 		permittedPG:          gochache.New(3*time.Second, 3*time.Second),
 		backedOffPG:          gochache.New(10*time.Second, 10*time.Second),
+		groupToJobId:         map[string]uint64{},
+		podToNode:            map[string]string{},
 	}
 	return pgMgr
 }
 
-func (pgMgr *PodGroupManager) BackoffPodGroup(pgName string, backoff time.Duration) {
-	if backoff == time.Duration(0) {
-		return
+// GetStatuses string (of all pods) to show for debugging purposes
+func (pgMgr *PodGroupManager) GetStatuses(pods []*corev1.Pod) string {
+	statuses := ""
+	for _, pod := range pods {
+		statuses += " " + fmt.Sprintf("%s", pod.Status.Phase)
 	}
-	pgMgr.backedOffPG.Add(pgName, nil, backoff)
+	return statuses
 }
 
-// ActivateSiblings stashes the pods belonging to the same PodGroup of the given pod
-// in the given state, with a reserved key "kubernetes.io/pods-to-activate".
-func (pgMgr *PodGroupManager) ActivateSiblings(pod *corev1.Pod, state *framework.CycleState) {
-	pgName := util.GetPodGroupLabel(pod)
-	if pgName == "" {
-		return
-	}
-
-	pods, err := pgMgr.podLister.Pods(pod.Namespace).List(
-		labels.SelectorFromSet(labels.Set{v1alpha1.PodGroupLabel: pgName}),
-	)
-	if err != nil {
-		klog.ErrorS(err, "Failed to obtain pods belong to a PodGroup", "podGroup", pgName)
-		return
-	}
-
-	for i := range pods {
-		if pods[i].UID == pod.UID {
-			pods = append(pods[:i], pods[i+1:]...)
-			break
-		}
-	}
-
-	if len(pods) != 0 {
-		if c, err := state.Read(framework.PodsToActivateKey); err == nil {
-			if s, ok := c.(*framework.PodsToActivate); ok {
-				s.Lock()
-				for _, pod := range pods {
-					namespacedName := GetNamespacedName(pod)
-					s.Map[namespacedName] = pod
-				}
-				s.Unlock()
-			}
-		}
-	}
+// GetPodNode is a quick lookup to see if we have a node
+func (pgMgr *PodGroupManager) GetPodNode(pod *corev1.Pod) string {
+	node, _ := pgMgr.podToNode[pod.Name]
+	return node
 }
 
 // PreFilter filters out a pod if
 // 1. it belongs to a podgroup that was recently denied or
 // 2. the total number of pods in the podgroup is less than the minimum number of pods
 // that is required to be scheduled.
-func (pgMgr *PodGroupManager) PreFilter(ctx context.Context, pod *corev1.Pod) error {
+func (pgMgr *PodGroupManager) PreFilter(
+	ctx context.Context,
+	pod *corev1.Pod,
+	state *framework.CycleState,
+) error {
+
 	klog.V(5).InfoS("Pre-filter", "pod", klog.KObj(pod))
 	pgFullName, pg := pgMgr.GetPodGroup(ctx, pod)
 	if pg == nil {
@@ -159,15 +143,24 @@ func (pgMgr *PodGroupManager) PreFilter(ctx context.Context, pod *corev1.Pod) er
 		return fmt.Errorf("podLister list pods failed: %w", err)
 	}
 
+	// Get statuses to show for debugging
+	statuses := pgMgr.GetStatuses(pods)
+
+	// This shows us the number of pods we have in the set and their states
+	klog.Infof("Fluence Pre-filter", "group", pgFullName, "pods", statuses, "MinMember", pg.Spec.MinMember, "Size", len(pods))
 	if len(pods) < int(pg.Spec.MinMember) {
 		return fmt.Errorf("pre-filter pod %v cannot find enough sibling pods, "+
 			"current pods number: %v, minMember of group: %v", pod.Name, len(pods), pg.Spec.MinMember)
 	}
 
-	if pg.Spec.MinResources == nil {
-		return nil
-	}
+	// TODO we likely can take advantage of these resources or other custom
+	// attributes we add. For now ignore and calculate based on pod needs (above)
+	// if pg.Spec.MinResources == nil {
+	//	fmt.Printf("Fluence Min resources are null, skipping PreFilter")
+	//	return nil
+	// }
 
+	// This is from coscheduling.
 	// TODO(cwdsuzhou): This resource check may not always pre-catch unschedulable pod group.
 	// It only tries to PreFilter resource constraints so even if a PodGroup passed here,
 	// it may not necessarily pass Filter due to other constraints such as affinity/taints.
@@ -175,43 +168,39 @@ func (pgMgr *PodGroupManager) PreFilter(ctx context.Context, pod *corev1.Pod) er
 		return nil
 	}
 
-	nodes, err := pgMgr.snapshotSharedLister.NodeInfos().List()
+	// TODO: right now we ask Fluxion for a podspec based on ONE pod, but
+	// we have the whole group! We can handle different pod needs now :)
+	repPod := pods[0]
+	nodes, err := pgMgr.AskFlux(ctx, *repPod, pg, pgFullName)
 	if err != nil {
+		klog.Infof("[Fluence] Fluxion returned an error %s, not schedulable", err.Error())
 		return err
 	}
+	klog.Infof("Node Selected %s (pod group %s)", nodes, pgFullName)
+
+	// Some reason fluxion gave us the wrong size?
+	if len(nodes) != len(pods) {
+		klog.Info("Warning - group %s needs %d nodes but Fluxion returned the wrong number nodes %d.", pgFullName, len(pods), len(nodes))
+		pgMgr.mutex.Lock()
+		pgMgr.cancelFluxJob(pgFullName, repPod)
+		pgMgr.mutex.Unlock()
+	}
 
-	minResources := pg.Spec.MinResources.DeepCopy()
-	podQuantity := resource.NewQuantity(int64(pg.Spec.MinMember), resource.DecimalSI)
-	minResources[corev1.ResourcePods] = *podQuantity
-	err = CheckClusterResource(nodes, minResources, pgFullName)
-	if err != nil {
-		klog.ErrorS(err, "Failed to PreFilter", "podGroup", klog.KObj(pg))
-		return err
+	// Create a fluxState (CycleState) with all nodes - this is used to retrieve
+	// the specific node assigned to the pod in Filter, which returns a node
+	// Note that this probably is not useful beyond the pod we are in the context
+	// of, but why not do it.
+	for i, node := range nodes {
+		pod := pods[i]
+		stateData := FluxStateData{NodeName: node}
+		state.Write(framework.StateKey(pod.Name), &stateData)
+		// Also save to the podToNode lookup
+		pgMgr.podToNode[pod.Name] = node
 	}
 	pgMgr.permittedPG.Add(pgFullName, pgFullName, *pgMgr.scheduleTimeout)
 	return nil
 }
 
-// Permit permits a pod to run, if the minMember match, it would send a signal to chan.
-func (pgMgr *PodGroupManager) Permit(ctx context.Context, pod *corev1.Pod) Status {
-	pgFullName, pg := pgMgr.GetPodGroup(ctx, pod)
-	if pgFullName == "" {
-		return PodGroupNotSpecified
-	}
-	if pg == nil {
-		// A Pod with a podGroup name but without a PodGroup found is denied.
-		return PodGroupNotFound
-	}
-
-	assigned := pgMgr.CalculateAssignedPods(pg.Name, pg.Namespace)
-	// The number of pods that have been assigned nodes is calculated from the snapshot.
-	// The current pod in not included in the snapshot during the current scheduling cycle.
-	if int32(assigned)+1 >= pg.Spec.MinMember {
-		return Success
-	}
-	return Wait
-}
-
 // GetCreationTimestamp returns the creation time of a podGroup or a pod.
 func (pgMgr *PodGroupManager) GetCreationTimestamp(pod *corev1.Pod, ts time.Time) time.Time {
 	pgName := util.GetPodGroupLabel(pod)
@@ -243,51 +232,6 @@ func (pgMgr *PodGroupManager) GetPodGroup(ctx context.Context, pod *corev1.Pod)
 	return fmt.Sprintf("%v/%v", pod.Namespace, pgName), &pg
 }
 
-// CalculateAssignedPods returns the number of pods that has been assigned nodes: assumed or bound.
-func (pgMgr *PodGroupManager) CalculateAssignedPods(podGroupName, namespace string) int {
-	nodeInfos, err := pgMgr.snapshotSharedLister.NodeInfos().List()
-	klog.Info(nodeInfos)
-	if err != nil {
-		klog.ErrorS(err, "Cannot get nodeInfos from frameworkHandle")
-		return 0
-	}
-	var count int
-	for _, nodeInfo := range nodeInfos {
-		for _, podInfo := range nodeInfo.Pods {
-			pod := podInfo.Pod
-			if util.GetPodGroupLabel(pod) == podGroupName && pod.Namespace == namespace && pod.Spec.NodeName != "" {
-				count++
-			}
-		}
-	}
-
-	return count
-}
-
-// CheckClusterResource checks if resource capacity of the cluster can satisfy <resourceRequest>.
-// It returns an error detailing the resource gap if not satisfied; otherwise returns nil.
-func CheckClusterResource(nodeList []*framework.NodeInfo, resourceRequest corev1.ResourceList, desiredPodGroupName string) error {
-	for _, info := range nodeList {
-		if info == nil || info.Node() == nil {
-			continue
-		}
-
-		nodeResource := util.ResourceList(getNodeResource(info, desiredPodGroupName))
-		for name, quant := range resourceRequest {
-			quant.Sub(nodeResource[name])
-			if quant.Sign() <= 0 {
-				delete(resourceRequest, name)
-				continue
-			}
-			resourceRequest[name] = quant
-		}
-		if len(resourceRequest) == 0 {
-			return nil
-		}
-	}
-	return fmt.Errorf("resource gap: %v", resourceRequest)
-}
-
 // GetNamespacedName returns the namespaced name.
 func GetNamespacedName(obj metav1.Object) string {
 	return fmt.Sprintf("%v/%v", obj.GetNamespace(), obj.GetName())
diff --git a/sig-scheduler-plugins/pkg/fluence/core/flux.go b/sig-scheduler-plugins/pkg/fluence/core/flux.go
new file mode 100644
index 0000000..def239f
--- /dev/null
+++ b/sig-scheduler-plugins/pkg/fluence/core/flux.go
@@ -0,0 +1,259 @@
+package core
+
+import (
+	"context"
+	"time"
+
+	"google.golang.org/grpc"
+	"k8s.io/apimachinery/pkg/labels"
+	klog "k8s.io/klog/v2"
+	pb "sigs.k8s.io/scheduler-plugins/pkg/fluence/fluxcli-grpc"
+	fgroup "sigs.k8s.io/scheduler-plugins/pkg/fluence/group"
+
+	"sigs.k8s.io/scheduler-plugins/apis/scheduling/v1alpha1"
+	"sigs.k8s.io/scheduler-plugins/pkg/fluence/utils"
+
+	corev1 "k8s.io/api/core/v1"
+)
+
+// AskFlux will ask flux for an allocation for nodes for the pod group.
+// We return the list of nodes, and assign to the entire group!
+func (pgMgr *PodGroupManager) AskFlux(
+	ctx context.Context,
+	pod corev1.Pod,
+	pg *v1alpha1.PodGroup,
+	groupName string,
+) ([]string, error) {
+
+	// clean up previous match if a pod has already allocated previously
+	pgMgr.mutex.Lock()
+	_, isAllocated := pgMgr.groupToJobId[groupName]
+	pgMgr.mutex.Unlock()
+
+	// This case happens when there is some reason that an initial job pods partially allocated,
+	// but then the job restarted, and new pods are present but fluence had assigned nodes to
+	// the old ones (and there aren't enough). The job would have had to complete in some way,
+	// and the PodGroup would have to then recreate, and have the same job id (the group name).
+	// This happened when I cancalled a bunch of jobs and they didn't have the chance to
+	// cancel in fluence. What we can do here is assume the previous pods are no longer running
+	// and cancel the flux job to create again.
+	if isAllocated {
+		klog.Info("Warning - group %s was previously allocated and is requesting again, so must have completed.", groupName)
+		pgMgr.mutex.Lock()
+		pgMgr.cancelFluxJob(groupName, &pod)
+		pgMgr.mutex.Unlock()
+	}
+	nodes := []string{}
+
+	// IMPORTANT: this is a JobSpec for *one* pod, assuming they are all the same.
+	// This obviously may not be true if we have a hetereogenous PodGroup.
+	// We name it based on the group, since it will represent the group
+	jobspec := utils.PreparePodJobSpec(&pod, groupName)
+	klog.Infof("[Fluence] Inspect pod info, jobspec: %s\n", jobspec)
+	conn, err := grpc.Dial("127.0.0.1:4242", grpc.WithInsecure())
+
+	// TODO change this to just return fmt.Errorf
+	if err != nil {
+		klog.Errorf("[Fluence] Error connecting to server: %v\n", err)
+		return nodes, err
+	}
+	defer conn.Close()
+
+	grpcclient := pb.NewFluxcliServiceClient(conn)
+	_, cancel := context.WithTimeout(context.Background(), 200*time.Second)
+	defer cancel()
+
+	request := &pb.MatchRequest{
+		Ps:      jobspec,
+		Request: "allocate",
+		Count:   pg.Spec.MinMember,
+	}
+
+	// An error here is an error with making the request
+	r, err := grpcclient.Match(context.Background(), request)
+	if err != nil {
+		klog.Errorf("[Fluence] did not receive any match response: %v\n", err)
+		return nodes, err
+	}
+
+	// TODO GetPodID should be renamed, because it will reflect the group
+	klog.Infof("[Fluence] Match response ID %s\n", r.GetPodID())
+
+	// Get the nodelist and inspect
+	nodelist := r.GetNodelist()
+	for _, node := range nodelist {
+		nodes = append(nodes, node.NodeID)
+	}
+	jobid := uint64(r.GetJobID())
+	klog.Infof("[Fluence] parsed node pods list %s for job id %d\n", nodes, jobid)
+
+	// TODO would be nice to actually be able to ask flux jobs -a to fluence
+	// That way we can verify assignments, etc.
+	pgMgr.mutex.Lock()
+	pgMgr.groupToJobId[groupName] = jobid
+	pgMgr.mutex.Unlock()
+	return nodes, nil
+}
+
+// cancelFluxJobForPod cancels the flux job for a pod.
+// We assume that the cancelled job also means deleting the pod group
+func (pgMgr *PodGroupManager) cancelFluxJob(groupName string, pod *corev1.Pod) error {
+
+	jobid, ok := pgMgr.groupToJobId[groupName]
+
+	// The job was already cancelled by another pod
+	if !ok {
+		klog.Infof("[Fluence] Request for cancel of group %s is already complete.", groupName)
+		return nil
+	}
+	klog.Infof("[Fluence] Cancel flux job: %v for group %s", jobid, groupName)
+
+	// This first error is about connecting to the server
+	conn, err := grpc.Dial("127.0.0.1:4242", grpc.WithInsecure())
+	if err != nil {
+		klog.Errorf("[Fluence] Error connecting to server: %v", err)
+		return err
+	}
+	defer conn.Close()
+
+	grpcclient := pb.NewFluxcliServiceClient(conn)
+	_, cancel := context.WithTimeout(context.Background(), 200*time.Second)
+	defer cancel()
+
+	// This error reflects the success or failure of the cancel request
+	request := &pb.CancelRequest{JobID: int64(jobid)}
+	res, err := grpcclient.Cancel(context.Background(), request)
+	if err != nil {
+		klog.Errorf("[Fluence] did not receive any cancel response: %v", err)
+		return err
+	}
+	klog.Infof("[Fluence] Job cancellation for group %s result: %d", groupName, res.Error)
+
+	// And this error is if the cancel was successful or not
+	if res.Error == 0 {
+		klog.Infof("[Fluence] Successful cancel of flux job: %d for group %s", jobid, groupName)
+		pgMgr.cleanup(pod, groupName)
+	} else {
+		klog.Warningf("[Fluence] Failed to cancel flux job %d for group %s", jobid, groupName)
+	}
+	return nil
+}
+
+// cleanup deletes the group name from groupToJobId, and pods names from the node lookup
+func (pgMgr *PodGroupManager) cleanup(pod *corev1.Pod, groupName string) {
+
+	delete(pgMgr.groupToJobId, groupName)
+
+	// Clean up previous pod->node assignments
+	pods, err := pgMgr.podLister.Pods(pod.Namespace).List(
+		labels.SelectorFromSet(labels.Set{v1alpha1.PodGroupLabel: groupName}),
+	)
+	// TODO need to handle this / understand why it's the case
+	if err != nil {
+		return
+	}
+	for _, pod := range pods {
+		delete(pgMgr.podToNode, pod.Name)
+	}
+}
+
+// UpdatePod is called on an update, and the old and new object are presented
+func (pgMgr *PodGroupManager) UpdatePod(oldObj, newObj interface{}) {
+
+	oldPod := oldObj.(*corev1.Pod)
+	newPod := newObj.(*corev1.Pod)
+
+	// a pod is updated, get the group
+	// TODO should we be checking group / size for old vs new?
+	groupName, pg := pgMgr.GetPodGroup(context.TODO(), oldPod)
+
+	// If PodGroup is nil, still try to look up a faux name
+	// TODO need to check if this might be problematic
+	if pg == nil {
+		pg = fgroup.CreateFakeGroup(oldPod)
+		groupName = pg.Name
+	}
+
+	klog.Infof("[Fluence] Processing event for pod %s in group %s from %s to %s", newPod.Name, groupName, oldPod.Status.Phase, newPod.Status.Phase)
+
+	switch newPod.Status.Phase {
+	case corev1.PodPending:
+		// in this state we don't know if a pod is going to be running, thus we don't need to update job map
+	case corev1.PodRunning:
+		// if a pod is start running, we can add it state to the delta graph if it is scheduled by other scheduler
+	case corev1.PodSucceeded:
+		klog.Infof("[Fluence] Pod %s succeeded, Fluence needs to free the resources", newPod.Name)
+
+		pgMgr.mutex.Lock()
+		defer pgMgr.mutex.Unlock()
+
+		// Do we have the group id in our cache? If yes, we haven't deleted the jobid yet
+		// I am worried here that if some pods are succeeded and others pending, this could
+		// be a mistake - fluence would schedule it again
+		_, ok := pgMgr.groupToJobId[groupName]
+		if ok {
+			pgMgr.cancelFluxJob(groupName, oldPod)
+		} else {
+			klog.Infof("[Fluence] Succeeded pod %s/%s in group %s doesn't have flux jobid", newPod.Namespace, newPod.Name, groupName)
+		}
+
+	case corev1.PodFailed:
+
+		// a corner case need to be tested, the pod exit code is not 0, can be created with segmentation fault pi test
+		klog.Warningf("[Fluence] Pod %s in group %s failed, Fluence needs to free the resources", newPod.Name, groupName)
+
+		pgMgr.mutex.Lock()
+		defer pgMgr.mutex.Unlock()
+
+		_, ok := pgMgr.groupToJobId[groupName]
+		if ok {
+			pgMgr.cancelFluxJob(groupName, oldPod)
+		} else {
+			klog.Errorf("[Fluence] Failed pod %s/%s in group %s doesn't have flux jobid", newPod.Namespace, newPod.Name, groupName)
+		}
+	case corev1.PodUnknown:
+		// don't know how to deal with it as it's unknown phase
+	default:
+		// shouldn't enter this branch
+	}
+}
+
+// DeletePod handles the delete event handler
+func (pgMgr *PodGroupManager) DeletePod(podObj interface{}) {
+	klog.Info("[Fluence] Delete Pod event handler")
+	pod := podObj.(*corev1.Pod)
+	groupName, pg := pgMgr.GetPodGroup(context.TODO(), pod)
+
+	// If PodGroup is nil, still try to look up a faux name
+	if pg == nil {
+		pg = fgroup.CreateFakeGroup(pod)
+		groupName = pg.Name
+	}
+
+	klog.Infof("[Fluence] Delete pod %s in group %s has status %s", pod.Status.Phase, pod.Name, groupName)
+	switch pod.Status.Phase {
+	case corev1.PodSucceeded:
+	case corev1.PodPending:
+		klog.Infof("[Fluence] Pod %s completed and is Pending termination, Fluence needs to free the resources", pod.Name)
+
+		pgMgr.mutex.Lock()
+		defer pgMgr.mutex.Unlock()
+
+		_, ok := pgMgr.groupToJobId[groupName]
+		if ok {
+			pgMgr.cancelFluxJob(groupName, pod)
+		} else {
+			klog.Infof("[Fluence] Terminating pod %s/%s in group %s doesn't have flux jobid", pod.Namespace, pod.Name, groupName)
+		}
+	case corev1.PodRunning:
+		pgMgr.mutex.Lock()
+		defer pgMgr.mutex.Unlock()
+
+		_, ok := pgMgr.groupToJobId[groupName]
+		if ok {
+			pgMgr.cancelFluxJob(groupName, pod)
+		} else {
+			klog.Infof("[Fluence] Deleted pod %s/%s in group %s doesn't have flux jobid", pod.Namespace, pod.Name, groupName)
+		}
+	}
+}
diff --git a/sig-scheduler-plugins/pkg/fluence/fluence.go b/sig-scheduler-plugins/pkg/fluence/fluence.go
index 1ad1fd3..5f9f635 100644
--- a/sig-scheduler-plugins/pkg/fluence/fluence.go
+++ b/sig-scheduler-plugins/pkg/fluence/fluence.go
@@ -22,20 +22,19 @@ import (
 	"sync"
 	"time"
 
-	v1 "k8s.io/api/core/v1"
-	"k8s.io/apimachinery/pkg/labels"
+	"k8s.io/apimachinery/pkg/util/sets"
+	klog "k8s.io/klog/v2"
+
+	corev1 "k8s.io/api/core/v1"
 	"k8s.io/apimachinery/pkg/runtime"
 	clientscheme "k8s.io/client-go/kubernetes/scheme"
 	"k8s.io/client-go/tools/cache"
 
 	fgroup "sigs.k8s.io/scheduler-plugins/pkg/fluence/group"
-	label "sigs.k8s.io/scheduler-plugins/pkg/fluence/labels"
 
 	corev1helpers "k8s.io/component-helpers/scheduling/corev1"
-	"k8s.io/klog/v2"
 	"k8s.io/kubernetes/pkg/scheduler/framework"
 	"sigs.k8s.io/controller-runtime/pkg/client"
-	"sigs.k8s.io/scheduler-plugins/pkg/util"
 
 	"sigs.k8s.io/scheduler-plugins/apis/config"
 	"sigs.k8s.io/scheduler-plugins/apis/scheduling"
@@ -46,25 +45,17 @@ import (
 // Fluence schedules pods in a group using Fluxion as a backend
 // We inherit cosched.Coscheduling to use some of the primary functions
 type Fluence struct {
-	mutex  sync.Mutex
-	client client.Client
-
-	// Store jobid on the level of a group (which can be a single pod)
-	groupToJobId map[string]uint64
-
+	mutex            sync.Mutex
+	client           client.Client
 	frameworkHandler framework.Handle
 	pgMgr            fcore.Manager
 	scheduleTimeout  *time.Duration
-	pgBackoff        *time.Duration
 }
 
 var (
-	_ framework.QueueSortPlugin   = &Fluence{}
-	_ framework.PreFilterPlugin   = &Fluence{}
-	_ framework.PostFilterPlugin  = &Fluence{} // Here down are from coscheduling
-	_ framework.PermitPlugin      = &Fluence{}
-	_ framework.ReservePlugin     = &Fluence{}
-	_ framework.EnqueueExtensions = &Fluence{}
+	_ framework.QueueSortPlugin = &Fluence{}
+	_ framework.PreFilterPlugin = &Fluence{}
+	_ framework.FilterPlugin    = &Fluence{}
 )
 
 const (
@@ -77,10 +68,11 @@ func New(obj runtime.Object, handle framework.Handle) (framework.Plugin, error)
 
 	// Keep these empty for now, use defaults
 	args := config.CoschedulingArgs{}
+	ctx := context.TODO()
 
 	scheme := runtime.NewScheme()
 	_ = clientscheme.AddToScheme(scheme)
-	_ = v1.AddToScheme(scheme)
+	_ = corev1.AddToScheme(scheme)
 	_ = v1alpha1.AddToScheme(scheme)
 
 	client, err := client.New(handle.KubeConfig(), client.Options{Scheme: scheme})
@@ -89,7 +81,8 @@ func New(obj runtime.Object, handle framework.Handle) (framework.Plugin, error)
 	}
 
 	// Performance improvement when retrieving list of objects by namespace or we'll log 'index not exist' warning.
-	handle.SharedInformerFactory().Core().V1().Pods().Informer().AddIndexers(cache.Indexers{cache.NamespaceIndex: cache.MetaNamespaceIndexFunc})
+	fluxPodsInformer := handle.SharedInformerFactory().Core().V1().Pods().Informer()
+	fluxPodsInformer.AddIndexers(cache.Indexers{cache.NamespaceIndex: cache.MetaNamespaceIndexFunc})
 
 	// PermitWaitingTimeSeconds is the waiting timeout in seconds.
 	scheduleTimeDuration := time.Duration(args.PermitWaitingTimeSeconds) * time.Second
@@ -101,22 +94,17 @@ func New(obj runtime.Object, handle framework.Handle) (framework.Plugin, error)
 		handle.SharedInformerFactory().Core().V1().Pods(),
 	)
 
-	// The main difference here is adding the groupToJobId lookup
+	// Event handlers to call on pgMgr
+	fluxPodsInformer.AddEventHandler(cache.ResourceEventHandlerFuncs{
+		UpdateFunc: pgMgr.UpdatePod,
+		DeleteFunc: pgMgr.DeletePod,
+	})
+	go fluxPodsInformer.Run(ctx.Done())
+
 	plugin := &Fluence{
 		frameworkHandler: handle,
 		pgMgr:            pgMgr,
 		scheduleTimeout:  &scheduleTimeDuration,
-		groupToJobId:     make(map[string]uint64),
-	}
-
-	// PodGroupBackoffSeconds:  backoff time in seconds before a pod group can be scheduled again.
-	if args.PodGroupBackoffSeconds < 0 {
-		err := fmt.Errorf("parse arguments failed")
-		klog.ErrorS(err, "PodGroupBackoffSeconds cannot be negative")
-		return nil, err
-	} else if args.PodGroupBackoffSeconds > 0 {
-		pgBackoff := time.Duration(args.PodGroupBackoffSeconds) * time.Second
-		plugin.pgBackoff = &pgBackoff
 	}
 	return plugin, nil
 }
@@ -128,6 +116,7 @@ func (f *Fluence) Name() string {
 // Fluence has added delete, although I wonder if update includes that signal
 // and it's redundant?
 func (f *Fluence) EventsToRegister() []framework.ClusterEventWithHint {
+	// TODO I have not redone this yet, not sure what it does (it might replace our informer above)
 	// To register a custom event, follow the naming convention at:
 	// https://git.k8s.io/kubernetes/pkg/scheduler/eventhandlers.go#L403-L410
 	pgGVK := fmt.Sprintf("podgroups.v1alpha1.%v", scheduling.GroupName)
@@ -137,6 +126,33 @@ func (f *Fluence) EventsToRegister() []framework.ClusterEventWithHint {
 	}
 }
 
+// TODO we need to account for affinity here
+func (f *Fluence) Filter(
+	ctx context.Context,
+	cycleState *framework.CycleState,
+	pod *corev1.Pod,
+	nodeInfo *framework.NodeInfo,
+) *framework.Status {
+
+	klog.Info("Filtering input node ", nodeInfo.Node().Name)
+	state, err := cycleState.Read(framework.StateKey(pod.Name))
+
+	// No error means we retrieved the state
+	if err == nil {
+
+		// Try to convert the state to FluxStateDate
+		value, ok := state.(*fcore.FluxStateData)
+
+		// If we have state data that isn't equal to the current assignment, no go
+		if ok && value.NodeName != nodeInfo.Node().Name {
+			return framework.NewStatus(framework.Unschedulable, "pod is not permitted")
+		} else {
+			klog.Infof("Filter: node %s selected for %s\n", value.NodeName, pod.Name)
+		}
+	}
+	return framework.NewStatus(framework.Success)
+}
+
 // Less is used to sort pods in the scheduling queue in the following order.
 // 1. Compare the priorities of Pods.
 // 2. Compare the initialization timestamps of PodGroups or Pods.
@@ -169,125 +185,37 @@ func (f *Fluence) Less(podInfo1, podInfo2 *framework.QueuedPodInfo) bool {
 
 }
 
-// PreFilter performs the following validations.
-// 1. Whether the PodGroup that the Pod belongs to is on the deny list.
-// 2. Whether the total number of pods in a PodGroup is less than its `minMember`.
-func (f *Fluence) PreFilter(ctx context.Context, state *framework.CycleState, pod *v1.Pod) (*framework.PreFilterResult, *framework.Status) {
-	// If PreFilter fails, return framework.UnschedulableAndUnresolvable to avoid
-	// any preemption attempts.
-	if err := f.pgMgr.PreFilter(ctx, pod); err != nil {
-		klog.ErrorS(err, "PreFilter failed", "pod", klog.KObj(pod))
-		return nil, framework.NewStatus(framework.UnschedulableAndUnresolvable, err.Error())
-	}
-	return nil, framework.NewStatus(framework.Success, "")
-}
-
-// PostFilter is used to reject a group of pods if a pod does not pass PreFilter or Filter.
-func (f *Fluence) PostFilter(ctx context.Context, state *framework.CycleState, pod *v1.Pod,
-	filteredNodeStatusMap framework.NodeToStatusMap) (*framework.PostFilterResult, *framework.Status) {
-	pgName, pg := f.pgMgr.GetPodGroup(ctx, pod)
-	if pg == nil {
-		klog.V(4).InfoS("Pod does not belong to any group", "pod", klog.KObj(pod))
-		return &framework.PostFilterResult{}, framework.NewStatus(framework.Unschedulable, "can not find pod group")
-	}
-
-	// This indicates there are already enough Pods satisfying the PodGroup,
-	// so don't bother to reject the whole PodGroup.
-	assigned := f.pgMgr.CalculateAssignedPods(pg.Name, pod.Namespace)
-	if assigned >= int(pg.Spec.MinMember) {
-		klog.V(4).InfoS("Assigned pods", "podGroup", klog.KObj(pg), "assigned", assigned)
-		return &framework.PostFilterResult{}, framework.NewStatus(framework.Unschedulable)
-	}
-
-	// If the gap is less than/equal 10%, we may want to try subsequent Pods
-	// to see they can satisfy the PodGroup
-	notAssignedPercentage := float32(int(pg.Spec.MinMember)-assigned) / float32(pg.Spec.MinMember)
-	if notAssignedPercentage <= 0.1 {
-		klog.V(4).InfoS("A small gap of pods to reach the quorum", "podGroup", klog.KObj(pg), "percentage", notAssignedPercentage)
-		return &framework.PostFilterResult{}, framework.NewStatus(framework.Unschedulable)
-	}
-
-	// It's based on an implicit assumption: if the nth Pod failed,
-	// it's inferrable other Pods belonging to the same PodGroup would be very likely to fail.
-	f.frameworkHandler.IterateOverWaitingPods(func(waitingPod framework.WaitingPod) {
-		if waitingPod.GetPod().Namespace == pod.Namespace && label.GetPodGroupLabel(waitingPod.GetPod()) == pg.Name {
-			klog.V(3).InfoS("PostFilter rejects the pod", "podGroup", klog.KObj(pg), "pod", klog.KObj(waitingPod.GetPod()))
-			waitingPod.Reject(f.Name(), "optimistic rejection in PostFilter")
-		}
-	})
-
-	if f.pgBackoff != nil {
-		pods, err := f.frameworkHandler.SharedInformerFactory().Core().V1().Pods().Lister().Pods(pod.Namespace).List(
-			labels.SelectorFromSet(labels.Set{v1alpha1.PodGroupLabel: label.GetPodGroupLabel(pod)}),
-		)
-		if err == nil && len(pods) >= int(pg.Spec.MinMember) {
-			f.pgMgr.BackoffPodGroup(pgName, *f.pgBackoff)
-		}
-	}
-
-	f.pgMgr.DeletePermittedPodGroup(pgName)
-	return &framework.PostFilterResult{}, framework.NewStatus(framework.Unschedulable,
-		fmt.Sprintf("PodGroup %v gets rejected due to Pod %v is unschedulable even after PostFilter", pgName, pod.Name))
-}
-
-// PreFilterExtensions returns a PreFilterExtensions interface if the plugin implements one.
+// PreFilterExtensions allow for callbacks on filtered states
+// This is required to be defined for a PreFilter plugin
+// https://github.com/kubernetes/kubernetes/blob/master/pkg/scheduler/framework/interface.go#L383
 func (f *Fluence) PreFilterExtensions() framework.PreFilterExtensions {
 	return nil
 }
 
-// Permit is the functions invoked by the framework at "Permit" extension point.
-func (f *Fluence) Permit(ctx context.Context, state *framework.CycleState, pod *v1.Pod, nodeName string) (*framework.Status, time.Duration) {
-	waitTime := *f.scheduleTimeout
-	s := f.pgMgr.Permit(ctx, pod)
-	var retStatus *framework.Status
-	switch s {
-	case fcore.PodGroupNotSpecified:
-		return framework.NewStatus(framework.Success, ""), 0
-	case fcore.PodGroupNotFound:
-		return framework.NewStatus(framework.Unschedulable, "PodGroup not found"), 0
-	case fcore.Wait:
-		klog.InfoS("Pod is waiting to be scheduled to node", "pod", klog.KObj(pod), "nodeName", nodeName)
-		_, pg := f.pgMgr.GetPodGroup(ctx, pod)
-
-		// Note this is in seconds, defaults to 60 seconds
-		if wait := util.GetWaitTimeDuration(pg, f.scheduleTimeout); wait != 0 {
-			waitTime = wait
-		}
-		retStatus = framework.NewStatus(framework.Wait)
-		// We will also request to move the sibling pods back to activeQ.
-		f.pgMgr.ActivateSiblings(pod, state)
-	case fcore.Success:
-		pgFullName := label.GetPodGroupFullName(pod)
-		f.frameworkHandler.IterateOverWaitingPods(func(waitingPod framework.WaitingPod) {
-			if label.GetPodGroupFullName(waitingPod.GetPod()) == pgFullName {
-				klog.V(3).InfoS("Permit allows", "pod", klog.KObj(waitingPod.GetPod()))
-				waitingPod.Allow(f.Name())
-			}
-		})
-		klog.V(3).InfoS("Permit allows", "pod", klog.KObj(pod))
-		retStatus = framework.NewStatus(framework.Success)
-		waitTime = 0
+// PreFilter performs the following validations.
+// 1. Whether the PodGroup that the Pod belongs to is on the deny list.
+// 2. Whether the total number of pods in a PodGroup is less than its `minMember`.
+func (f *Fluence) PreFilter(
+	ctx context.Context,
+	state *framework.CycleState,
+	pod *corev1.Pod,
+) (*framework.PreFilterResult, *framework.Status) {
+
+	// Quick check if the pod is already scheduled
+	f.mutex.Lock()
+	node := f.pgMgr.GetPodNode(pod)
+	f.mutex.Unlock()
+	if node != "" {
+		result := framework.PreFilterResult{NodeNames: sets.New(node)}
+		return &result, framework.NewStatus(framework.Success, "")
 	}
-
-	return retStatus, waitTime
-}
-
-// Reserve is the functions invoked by the framework at "reserve" extension point.
-func (f *Fluence) Reserve(ctx context.Context, state *framework.CycleState, pod *v1.Pod, nodeName string) *framework.Status {
-	return nil
-}
-
-// Unreserve rejects all other Pods in the PodGroup when one of the pods in the group times out.
-func (f *Fluence) Unreserve(ctx context.Context, state *framework.CycleState, pod *v1.Pod, nodeName string) {
-	pgName, pg := f.pgMgr.GetPodGroup(ctx, pod)
-	if pg == nil {
-		return
+	// This will populate the node name into the pod group manager
+	err := f.pgMgr.PreFilter(ctx, pod, state)
+	if err != nil {
+		klog.ErrorS(err, "PreFilter failed", "pod", klog.KObj(pod))
+		return nil, framework.NewStatus(framework.UnschedulableAndUnresolvable, err.Error())
 	}
-	f.frameworkHandler.IterateOverWaitingPods(func(waitingPod framework.WaitingPod) {
-		if waitingPod.GetPod().Namespace == pod.Namespace && label.GetPodGroupLabel(waitingPod.GetPod()) == pg.Name {
-			klog.V(3).InfoS("Unreserve rejects", "pod", klog.KObj(waitingPod.GetPod()), "podGroup", klog.KObj(pg))
-			waitingPod.Reject(f.Name(), "rejection in Unreserve")
-		}
-	})
-	f.pgMgr.DeletePermittedPodGroup(pgName)
+	node = f.pgMgr.GetPodNode(pod)
+	result := framework.PreFilterResult{NodeNames: sets.New(node)}
+	return &result, framework.NewStatus(framework.Success, "")
 }

From 5a86a239362b1b3777f5194b7565969b27ac6565 Mon Sep 17 00:00:00 2001
From: vsoch <vsoch@users.noreply.github.com>
Date: Thu, 14 Mar 2024 20:43:53 -0600
Subject: [PATCH 23/28] feat: add small logger just for fluence

Problem: it is really hard using klog and parses through messy multi-threaded logs
Solution: make a little (likely temporary) filesystem logger for a single place
of truth!

Signed-off-by: vsoch <vsoch@users.noreply.github.com>
---
 Makefile                                      |   2 +
 README.md                                     |   8 +
 .../pkg/controllers/podgroup_controller.go    |  14 +
 .../pkg/fluence/core/core.go                  |  62 ++---
 .../pkg/fluence/core/flux.go                  |  46 ++--
 sig-scheduler-plugins/pkg/fluence/fluence.go  |  23 +-
 sig-scheduler-plugins/pkg/fluence/register.go |  55 ++++
 sig-scheduler-plugins/pkg/logger/logger.go    |  88 ++++++
 src/fluence/utils/utils.go                    | 256 +++++++++++-------
 9 files changed, 388 insertions(+), 166 deletions(-)
 create mode 100644 sig-scheduler-plugins/pkg/fluence/register.go
 create mode 100644 sig-scheduler-plugins/pkg/logger/logger.go

diff --git a/Makefile b/Makefile
index 91789d8..d051a0e 100644
--- a/Makefile
+++ b/Makefile
@@ -26,9 +26,11 @@ update: clone
 prepare: clone
 	# These are entirely new directory structures
 	rm -rf $(CLONE_UPSTREAM)/pkg/fluence
+	rm -rf $(CLONE_UPSTREAM)/pkg/logger
 	# rm -rf $(CLONE_UPSTREAM)/cmd/app
 	rm -rf $(CLONE_UPSTREAM)/pkg/controllers/podgroup_controller.go
 	rm -rf $(CLONE_UPSTREAM)/cmd/controller/app/server.go
+	cp -R sig-scheduler-plugins/pkg/logger $(CLONE_UPSTREAM)/pkg/logger
 	cp -R sig-scheduler-plugins/pkg/fluence $(CLONE_UPSTREAM)/pkg/fluence
 	cp -R sig-scheduler-plugins/pkg/controllers/* $(CLONE_UPSTREAM)/pkg/controllers/
 	# This is the one exception not from sig-scheduler-plugins because it is needed in both spots
diff --git a/README.md b/README.md
index 8c51de7..e3e1214 100644
--- a/README.md
+++ b/README.md
@@ -509,6 +509,14 @@ The last step ensures we use the images we loaded! You can basically just do:
 
 This sped up my development time immensely. If you want to manually do the steps, see that script for instructions.
 
+#### Logging
+
+For easier viewing of what fluence is doing (in the sig-scheduler-plugins) we have a file logger that can be seen in the container:
+
+```bash
+$ kubectl exec -it fluence-68c4c586c6-nktdl -c scheduler-plugins-scheduler -- cat /tmp/fluence.log
+```
+
 ##### kubectl plugin
 
 Note that if you want to enable extra endpoints for the fluence kubectl plugin and expose the GRPC as a service, you can do:
diff --git a/sig-scheduler-plugins/pkg/controllers/podgroup_controller.go b/sig-scheduler-plugins/pkg/controllers/podgroup_controller.go
index 27c31cb..a2fd4a6 100644
--- a/sig-scheduler-plugins/pkg/controllers/podgroup_controller.go
+++ b/sig-scheduler-plugins/pkg/controllers/podgroup_controller.go
@@ -405,6 +405,8 @@ func (r *PodGroupReconciler) SetupWithManager(mgr ctrl.Manager) error {
 		Complete(r)
 }
 
+// ensurePodGroup ensures we create the pod group (or delete) when pod is deleted
+// for delete, this would be better done as an owner reference., but I haven't gotten it working
 func (r *PodGroupReconciler) ensurePodGroup(ctx context.Context, obj client.Object) []ctrl.Request {
 	pod, ok := obj.(*v1.Pod)
 	if !ok {
@@ -418,6 +420,18 @@ func (r *PodGroupReconciler) ensurePodGroup(ctx context.Context, obj client.Obje
 		return nil
 	}
 
+	// If we deleted the pod... assume we delete the group too
+	if !pod.ObjectMeta.DeletionTimestamp.IsZero() {
+		r.log.Info("Pod: ", "Name", pod.Name, "Status", pod.Status.Phase, "Action", "Deleted")
+
+		pg := &schedv1alpha1.PodGroup{}
+		err := r.Get(ctx, types.NamespacedName{Name: groupName, Namespace: pod.Namespace}, pg)
+		if err != nil {
+			r.Delete(ctx, pg)
+		}
+		return nil
+	}
+
 	// If we are watching the Pod and it's beyond pending, we hopefully already made a group
 	// and that group should be in the reconcile process.
 	if pod.Status.Phase != v1.PodPending {
diff --git a/sig-scheduler-plugins/pkg/fluence/core/core.go b/sig-scheduler-plugins/pkg/fluence/core/core.go
index eed9536..8b08468 100644
--- a/sig-scheduler-plugins/pkg/fluence/core/core.go
+++ b/sig-scheduler-plugins/pkg/fluence/core/core.go
@@ -35,6 +35,7 @@ import (
 	"sigs.k8s.io/controller-runtime/pkg/client"
 
 	"sigs.k8s.io/scheduler-plugins/apis/scheduling/v1alpha1"
+	"sigs.k8s.io/scheduler-plugins/pkg/logger"
 	"sigs.k8s.io/scheduler-plugins/pkg/util"
 )
 
@@ -84,10 +85,17 @@ type PodGroupManager struct {
 	// Probably should just choose one... oh well
 	sync.RWMutex
 	mutex sync.Mutex
+	log   *logger.DebugLogger
 }
 
 // NewPodGroupManager creates a new operation object.
-func NewPodGroupManager(client client.Client, snapshotSharedLister framework.SharedLister, scheduleTimeout *time.Duration, podInformer informerv1.PodInformer) *PodGroupManager {
+func NewPodGroupManager(
+	client client.Client,
+	snapshotSharedLister framework.SharedLister,
+	scheduleTimeout *time.Duration,
+	podInformer informerv1.PodInformer,
+	log *logger.DebugLogger,
+) *PodGroupManager {
 	pgMgr := &PodGroupManager{
 		client:               client,
 		snapshotSharedLister: snapshotSharedLister,
@@ -97,6 +105,7 @@ func NewPodGroupManager(client client.Client, snapshotSharedLister framework.Sha
 		backedOffPG:          gochache.New(10*time.Second, 10*time.Second),
 		groupToJobId:         map[string]uint64{},
 		podToNode:            map[string]string{},
+		log:                  log,
 	}
 	return pgMgr
 }
@@ -126,13 +135,14 @@ func (pgMgr *PodGroupManager) PreFilter(
 	state *framework.CycleState,
 ) error {
 
-	klog.V(5).InfoS("Pre-filter", "pod", klog.KObj(pod))
+	pgMgr.log.Info("[PodGroup PreFilter] pod %s", klog.KObj(pod))
 	pgFullName, pg := pgMgr.GetPodGroup(ctx, pod)
 	if pg == nil {
 		return nil
 	}
 
-	if _, exist := pgMgr.backedOffPG.Get(pgFullName); exist {
+	_, exist := pgMgr.backedOffPG.Get(pgFullName)
+	if exist {
 		return fmt.Errorf("podGroup %v failed recently", pgFullName)
 	}
 
@@ -147,7 +157,7 @@ func (pgMgr *PodGroupManager) PreFilter(
 	statuses := pgMgr.GetStatuses(pods)
 
 	// This shows us the number of pods we have in the set and their states
-	klog.Infof("Fluence Pre-filter", "group", pgFullName, "pods", statuses, "MinMember", pg.Spec.MinMember, "Size", len(pods))
+	pgMgr.log.Info("[PodGroup PreFilter] group: %s pods: %s MinMember: %d Size: %d", pgFullName, statuses, pg.Spec.MinMember, len(pods))
 	if len(pods) < int(pg.Spec.MinMember) {
 		return fmt.Errorf("pre-filter pod %v cannot find enough sibling pods, "+
 			"current pods number: %v, minMember of group: %v", pod.Name, len(pods), pg.Spec.MinMember)
@@ -164,7 +174,8 @@ func (pgMgr *PodGroupManager) PreFilter(
 	// TODO(cwdsuzhou): This resource check may not always pre-catch unschedulable pod group.
 	// It only tries to PreFilter resource constraints so even if a PodGroup passed here,
 	// it may not necessarily pass Filter due to other constraints such as affinity/taints.
-	if _, ok := pgMgr.permittedPG.Get(pgFullName); ok {
+	_, ok := pgMgr.permittedPG.Get(pgFullName)
+	if ok {
 		return nil
 	}
 
@@ -173,14 +184,14 @@ func (pgMgr *PodGroupManager) PreFilter(
 	repPod := pods[0]
 	nodes, err := pgMgr.AskFlux(ctx, *repPod, pg, pgFullName)
 	if err != nil {
-		klog.Infof("[Fluence] Fluxion returned an error %s, not schedulable", err.Error())
+		pgMgr.log.Info("[PodGroup PreFilter] Fluxion returned an error %s, not schedulable", err.Error())
 		return err
 	}
-	klog.Infof("Node Selected %s (pod group %s)", nodes, pgFullName)
+	pgMgr.log.Info("Node Selected %s (pod group %s)", nodes, pgFullName)
 
 	// Some reason fluxion gave us the wrong size?
 	if len(nodes) != len(pods) {
-		klog.Info("Warning - group %s needs %d nodes but Fluxion returned the wrong number nodes %d.", pgFullName, len(pods), len(nodes))
+		pgMgr.log.Warning("[PodGroup PreFilter] group %s needs %d nodes but Fluxion returned the wrong number nodes %d.", pgFullName, len(pods), len(nodes))
 		pgMgr.mutex.Lock()
 		pgMgr.cancelFluxJob(pgFullName, repPod)
 		pgMgr.mutex.Unlock()
@@ -236,38 +247,3 @@ func (pgMgr *PodGroupManager) GetPodGroup(ctx context.Context, pod *corev1.Pod)
 func GetNamespacedName(obj metav1.Object) string {
 	return fmt.Sprintf("%v/%v", obj.GetNamespace(), obj.GetName())
 }
-
-func getNodeResource(info *framework.NodeInfo, desiredPodGroupName string) *framework.Resource {
-	nodeClone := info.Clone()
-	for _, podInfo := range info.Pods {
-		if podInfo == nil || podInfo.Pod == nil {
-			continue
-		}
-		if util.GetPodGroupFullName(podInfo.Pod) != desiredPodGroupName {
-			continue
-		}
-		nodeClone.RemovePod(podInfo.Pod)
-	}
-
-	leftResource := framework.Resource{
-		ScalarResources: make(map[corev1.ResourceName]int64),
-	}
-	allocatable := nodeClone.Allocatable
-	requested := nodeClone.Requested
-
-	leftResource.AllowedPodNumber = allocatable.AllowedPodNumber - len(nodeClone.Pods)
-	leftResource.MilliCPU = allocatable.MilliCPU - requested.MilliCPU
-	leftResource.Memory = allocatable.Memory - requested.Memory
-	leftResource.EphemeralStorage = allocatable.EphemeralStorage - requested.EphemeralStorage
-
-	for k, allocatableEx := range allocatable.ScalarResources {
-		requestEx, ok := requested.ScalarResources[k]
-		if !ok {
-			leftResource.ScalarResources[k] = allocatableEx
-		} else {
-			leftResource.ScalarResources[k] = allocatableEx - requestEx
-		}
-	}
-	klog.V(4).InfoS("Node left resource", "node", klog.KObj(info.Node()), "resource", leftResource)
-	return &leftResource
-}
diff --git a/sig-scheduler-plugins/pkg/fluence/core/flux.go b/sig-scheduler-plugins/pkg/fluence/core/flux.go
index def239f..48e1500 100644
--- a/sig-scheduler-plugins/pkg/fluence/core/flux.go
+++ b/sig-scheduler-plugins/pkg/fluence/core/flux.go
@@ -6,7 +6,6 @@ import (
 
 	"google.golang.org/grpc"
 	"k8s.io/apimachinery/pkg/labels"
-	klog "k8s.io/klog/v2"
 	pb "sigs.k8s.io/scheduler-plugins/pkg/fluence/fluxcli-grpc"
 	fgroup "sigs.k8s.io/scheduler-plugins/pkg/fluence/group"
 
@@ -38,7 +37,7 @@ func (pgMgr *PodGroupManager) AskFlux(
 	// cancel in fluence. What we can do here is assume the previous pods are no longer running
 	// and cancel the flux job to create again.
 	if isAllocated {
-		klog.Info("Warning - group %s was previously allocated and is requesting again, so must have completed.", groupName)
+		pgMgr.log.Warning("[PodGroup AskFlux] group %s was previously allocated and is requesting again, so must have completed.", groupName)
 		pgMgr.mutex.Lock()
 		pgMgr.cancelFluxJob(groupName, &pod)
 		pgMgr.mutex.Unlock()
@@ -49,12 +48,12 @@ func (pgMgr *PodGroupManager) AskFlux(
 	// This obviously may not be true if we have a hetereogenous PodGroup.
 	// We name it based on the group, since it will represent the group
 	jobspec := utils.PreparePodJobSpec(&pod, groupName)
-	klog.Infof("[Fluence] Inspect pod info, jobspec: %s\n", jobspec)
+	pgMgr.log.Info("[PodGroup AskFlux] Inspect pod info, jobspec: %s\n", jobspec)
 	conn, err := grpc.Dial("127.0.0.1:4242", grpc.WithInsecure())
 
 	// TODO change this to just return fmt.Errorf
 	if err != nil {
-		klog.Errorf("[Fluence] Error connecting to server: %v\n", err)
+		pgMgr.log.Error("[PodGroup AskFlux] Error connecting to server: %v\n", err)
 		return nodes, err
 	}
 	defer conn.Close()
@@ -72,12 +71,12 @@ func (pgMgr *PodGroupManager) AskFlux(
 	// An error here is an error with making the request
 	r, err := grpcclient.Match(context.Background(), request)
 	if err != nil {
-		klog.Errorf("[Fluence] did not receive any match response: %v\n", err)
+		pgMgr.log.Warning("[PodGroup AskFlux] did not receive any match response: %v\n", err)
 		return nodes, err
 	}
 
 	// TODO GetPodID should be renamed, because it will reflect the group
-	klog.Infof("[Fluence] Match response ID %s\n", r.GetPodID())
+	pgMgr.log.Info("[PodGroup AskFlux] Match response ID %s\n", r.GetPodID())
 
 	// Get the nodelist and inspect
 	nodelist := r.GetNodelist()
@@ -85,7 +84,7 @@ func (pgMgr *PodGroupManager) AskFlux(
 		nodes = append(nodes, node.NodeID)
 	}
 	jobid := uint64(r.GetJobID())
-	klog.Infof("[Fluence] parsed node pods list %s for job id %d\n", nodes, jobid)
+	pgMgr.log.Info("[PodGroup AskFlux] parsed node pods list %s for job id %d\n", nodes, jobid)
 
 	// TODO would be nice to actually be able to ask flux jobs -a to fluence
 	// That way we can verify assignments, etc.
@@ -103,15 +102,15 @@ func (pgMgr *PodGroupManager) cancelFluxJob(groupName string, pod *corev1.Pod) e
 
 	// The job was already cancelled by another pod
 	if !ok {
-		klog.Infof("[Fluence] Request for cancel of group %s is already complete.", groupName)
+		pgMgr.log.Info("[PodGroup cancelFluxJob] Request for cancel of group %s is already complete.", groupName)
 		return nil
 	}
-	klog.Infof("[Fluence] Cancel flux job: %v for group %s", jobid, groupName)
+	pgMgr.log.Info("[PodGroup cancelFluxJob] Cancel flux job: %v for group %s", jobid, groupName)
 
 	// This first error is about connecting to the server
 	conn, err := grpc.Dial("127.0.0.1:4242", grpc.WithInsecure())
 	if err != nil {
-		klog.Errorf("[Fluence] Error connecting to server: %v", err)
+		pgMgr.log.Error("[PodGroup cancelFluxJob] Error connecting to server: %v", err)
 		return err
 	}
 	defer conn.Close()
@@ -124,17 +123,17 @@ func (pgMgr *PodGroupManager) cancelFluxJob(groupName string, pod *corev1.Pod) e
 	request := &pb.CancelRequest{JobID: int64(jobid)}
 	res, err := grpcclient.Cancel(context.Background(), request)
 	if err != nil {
-		klog.Errorf("[Fluence] did not receive any cancel response: %v", err)
+		pgMgr.log.Error("[PodGroup cancelFluxJob] did not receive any cancel response: %v", err)
 		return err
 	}
-	klog.Infof("[Fluence] Job cancellation for group %s result: %d", groupName, res.Error)
+	pgMgr.log.Info("[PodGroup cancelFluxJob] Job cancellation for group %s result: %d", groupName, res.Error)
 
 	// And this error is if the cancel was successful or not
 	if res.Error == 0 {
-		klog.Infof("[Fluence] Successful cancel of flux job: %d for group %s", jobid, groupName)
+		pgMgr.log.Info("[PodGroup cancelFluxJob] Successful cancel of flux job: %d for group %s", jobid, groupName)
 		pgMgr.cleanup(pod, groupName)
 	} else {
-		klog.Warningf("[Fluence] Failed to cancel flux job %d for group %s", jobid, groupName)
+		pgMgr.log.Warning("[PodGroup cancelFluxJob] Failed to cancel flux job %d for group %s", jobid, groupName)
 	}
 	return nil
 }
@@ -174,7 +173,7 @@ func (pgMgr *PodGroupManager) UpdatePod(oldObj, newObj interface{}) {
 		groupName = pg.Name
 	}
 
-	klog.Infof("[Fluence] Processing event for pod %s in group %s from %s to %s", newPod.Name, groupName, oldPod.Status.Phase, newPod.Status.Phase)
+	pgMgr.log.Verbose("[PodGroup UpdatePod] Processing event for pod %s in group %s from %s to %s", newPod.Name, groupName, oldPod.Status.Phase, newPod.Status.Phase)
 
 	switch newPod.Status.Phase {
 	case corev1.PodPending:
@@ -182,7 +181,7 @@ func (pgMgr *PodGroupManager) UpdatePod(oldObj, newObj interface{}) {
 	case corev1.PodRunning:
 		// if a pod is start running, we can add it state to the delta graph if it is scheduled by other scheduler
 	case corev1.PodSucceeded:
-		klog.Infof("[Fluence] Pod %s succeeded, Fluence needs to free the resources", newPod.Name)
+		pgMgr.log.Info("[PodGroup UpdatePod] Pod %s succeeded, Fluence needs to free the resources", newPod.Name)
 
 		pgMgr.mutex.Lock()
 		defer pgMgr.mutex.Unlock()
@@ -194,13 +193,13 @@ func (pgMgr *PodGroupManager) UpdatePod(oldObj, newObj interface{}) {
 		if ok {
 			pgMgr.cancelFluxJob(groupName, oldPod)
 		} else {
-			klog.Infof("[Fluence] Succeeded pod %s/%s in group %s doesn't have flux jobid", newPod.Namespace, newPod.Name, groupName)
+			pgMgr.log.Verbose("[PodGroup UpdatePod] Succeeded pod %s/%s in group %s doesn't have flux jobid", newPod.Namespace, newPod.Name, groupName)
 		}
 
 	case corev1.PodFailed:
 
 		// a corner case need to be tested, the pod exit code is not 0, can be created with segmentation fault pi test
-		klog.Warningf("[Fluence] Pod %s in group %s failed, Fluence needs to free the resources", newPod.Name, groupName)
+		pgMgr.log.Warning("[PodGroup UpdatePod] Pod %s in group %s failed, Fluence needs to free the resources", newPod.Name, groupName)
 
 		pgMgr.mutex.Lock()
 		defer pgMgr.mutex.Unlock()
@@ -209,7 +208,7 @@ func (pgMgr *PodGroupManager) UpdatePod(oldObj, newObj interface{}) {
 		if ok {
 			pgMgr.cancelFluxJob(groupName, oldPod)
 		} else {
-			klog.Errorf("[Fluence] Failed pod %s/%s in group %s doesn't have flux jobid", newPod.Namespace, newPod.Name, groupName)
+			pgMgr.log.Error("[PodGroup UpdatePod] Failed pod %s/%s in group %s doesn't have flux jobid", newPod.Namespace, newPod.Name, groupName)
 		}
 	case corev1.PodUnknown:
 		// don't know how to deal with it as it's unknown phase
@@ -220,7 +219,6 @@ func (pgMgr *PodGroupManager) UpdatePod(oldObj, newObj interface{}) {
 
 // DeletePod handles the delete event handler
 func (pgMgr *PodGroupManager) DeletePod(podObj interface{}) {
-	klog.Info("[Fluence] Delete Pod event handler")
 	pod := podObj.(*corev1.Pod)
 	groupName, pg := pgMgr.GetPodGroup(context.TODO(), pod)
 
@@ -230,11 +228,11 @@ func (pgMgr *PodGroupManager) DeletePod(podObj interface{}) {
 		groupName = pg.Name
 	}
 
-	klog.Infof("[Fluence] Delete pod %s in group %s has status %s", pod.Status.Phase, pod.Name, groupName)
+	pgMgr.log.Verbose("[PodGroup DeletePod] Delete pod %s in group %s has status %s", pod.Status.Phase, pod.Name, groupName)
 	switch pod.Status.Phase {
 	case corev1.PodSucceeded:
 	case corev1.PodPending:
-		klog.Infof("[Fluence] Pod %s completed and is Pending termination, Fluence needs to free the resources", pod.Name)
+		pgMgr.log.Verbose("[PodGroup DeletePod] Pod %s completed and is Pending termination, Fluence needs to free the resources", pod.Name)
 
 		pgMgr.mutex.Lock()
 		defer pgMgr.mutex.Unlock()
@@ -243,7 +241,7 @@ func (pgMgr *PodGroupManager) DeletePod(podObj interface{}) {
 		if ok {
 			pgMgr.cancelFluxJob(groupName, pod)
 		} else {
-			klog.Infof("[Fluence] Terminating pod %s/%s in group %s doesn't have flux jobid", pod.Namespace, pod.Name, groupName)
+			pgMgr.log.Info("[PodGroup DeletePod] Terminating pod %s/%s in group %s doesn't have flux jobid", pod.Namespace, pod.Name, groupName)
 		}
 	case corev1.PodRunning:
 		pgMgr.mutex.Lock()
@@ -253,7 +251,7 @@ func (pgMgr *PodGroupManager) DeletePod(podObj interface{}) {
 		if ok {
 			pgMgr.cancelFluxJob(groupName, pod)
 		} else {
-			klog.Infof("[Fluence] Deleted pod %s/%s in group %s doesn't have flux jobid", pod.Namespace, pod.Name, groupName)
+			pgMgr.log.Info("[PodGroup DeletePod] Deleted pod %s/%s in group %s doesn't have flux jobid", pod.Namespace, pod.Name, groupName)
 		}
 	}
 }
diff --git a/sig-scheduler-plugins/pkg/fluence/fluence.go b/sig-scheduler-plugins/pkg/fluence/fluence.go
index 5f9f635..84f3e95 100644
--- a/sig-scheduler-plugins/pkg/fluence/fluence.go
+++ b/sig-scheduler-plugins/pkg/fluence/fluence.go
@@ -25,6 +25,8 @@ import (
 	"k8s.io/apimachinery/pkg/util/sets"
 	klog "k8s.io/klog/v2"
 
+	"sigs.k8s.io/scheduler-plugins/pkg/logger"
+
 	corev1 "k8s.io/api/core/v1"
 	"k8s.io/apimachinery/pkg/runtime"
 	clientscheme "k8s.io/client-go/kubernetes/scheme"
@@ -50,6 +52,7 @@ type Fluence struct {
 	frameworkHandler framework.Handle
 	pgMgr            fcore.Manager
 	scheduleTimeout  *time.Duration
+	log              *logger.DebugLogger
 }
 
 var (
@@ -70,6 +73,11 @@ func New(obj runtime.Object, handle framework.Handle) (framework.Plugin, error)
 	args := config.CoschedulingArgs{}
 	ctx := context.TODO()
 
+	// Make fluence his own little logger!
+	// This can eventually be a flag, but just going to set for now
+	// It shall be a very chonky file. Oh lawd he comin!
+	l := logger.NewDebugLogger(logger.LevelError, "/tmp/fluence.log")
+
 	scheme := runtime.NewScheme()
 	_ = clientscheme.AddToScheme(scheme)
 	_ = corev1.AddToScheme(scheme)
@@ -92,6 +100,7 @@ func New(obj runtime.Object, handle framework.Handle) (framework.Plugin, error)
 		&scheduleTimeDuration,
 		// Keep the podInformer (from frameworkHandle) as the single source of Pods.
 		handle.SharedInformerFactory().Core().V1().Pods(),
+		l,
 	)
 
 	// Event handlers to call on pgMgr
@@ -105,8 +114,13 @@ func New(obj runtime.Object, handle framework.Handle) (framework.Plugin, error)
 		frameworkHandler: handle,
 		pgMgr:            pgMgr,
 		scheduleTimeout:  &scheduleTimeDuration,
+		log:              l,
 	}
-	return plugin, nil
+
+	// TODO this is not supported yet
+	// Account for resources in running cluster
+	err = plugin.RegisterExisting(ctx)
+	return plugin, err
 }
 
 func (f *Fluence) Name() string {
@@ -134,7 +148,7 @@ func (f *Fluence) Filter(
 	nodeInfo *framework.NodeInfo,
 ) *framework.Status {
 
-	klog.Info("Filtering input node ", nodeInfo.Node().Name)
+	f.log.Verbose("[Fluence Filter] Filtering input node %s", nodeInfo.Node().Name)
 	state, err := cycleState.Read(framework.StateKey(pod.Name))
 
 	// No error means we retrieved the state
@@ -147,7 +161,7 @@ func (f *Fluence) Filter(
 		if ok && value.NodeName != nodeInfo.Node().Name {
 			return framework.NewStatus(framework.Unschedulable, "pod is not permitted")
 		} else {
-			klog.Infof("Filter: node %s selected for %s\n", value.NodeName, pod.Name)
+			f.log.Info("[Fluence Filter] node %s selected for %s\n", value.NodeName, pod.Name)
 		}
 	}
 	return framework.NewStatus(framework.Success)
@@ -158,7 +172,6 @@ func (f *Fluence) Filter(
 // 2. Compare the initialization timestamps of PodGroups or Pods.
 // 3. Compare the keys of PodGroups/Pods: <namespace>/<podname>.
 func (f *Fluence) Less(podInfo1, podInfo2 *framework.QueuedPodInfo) bool {
-	klog.Infof("ordering pods in fluence scheduler plugin")
 	prio1 := corev1helpers.PodPriority(podInfo1.Pod)
 	prio2 := corev1helpers.PodPriority(podInfo2.Pod)
 	if prio1 != prio2 {
@@ -212,7 +225,7 @@ func (f *Fluence) PreFilter(
 	// This will populate the node name into the pod group manager
 	err := f.pgMgr.PreFilter(ctx, pod, state)
 	if err != nil {
-		klog.ErrorS(err, "PreFilter failed", "pod", klog.KObj(pod))
+		f.log.Error("[Fluence PreFilter] failed pod %s: %s", klog.KObj(pod), err.Error())
 		return nil, framework.NewStatus(framework.UnschedulableAndUnresolvable, err.Error())
 	}
 	node = f.pgMgr.GetPodNode(pod)
diff --git a/sig-scheduler-plugins/pkg/fluence/register.go b/sig-scheduler-plugins/pkg/fluence/register.go
new file mode 100644
index 0000000..8f39f09
--- /dev/null
+++ b/sig-scheduler-plugins/pkg/fluence/register.go
@@ -0,0 +1,55 @@
+/*
+Copyright 2020 The Kubernetes Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package fluence
+
+import (
+	"context"
+
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	"k8s.io/client-go/kubernetes"
+	"k8s.io/client-go/rest"
+)
+
+// RegisterExisting uses the in cluster API to ensure existing pods
+// are known to fluence, This is a one-time, static approach, so if a resource
+// here goes away we cannot remove it from being known. But it's better than
+// not having it, and having fluxion assume more resources than the
+// cluster has available. This is a TODO as fluxion does not support it
+func (f *Fluence) RegisterExisting(ctx context.Context) error {
+
+	// creates an in-cluster config and client
+	config, err := rest.InClusterConfig()
+	if err != nil {
+		f.log.Error("[Fluence RegisterExisting] Error creating in-cluster config: %s\n", err)
+		return err
+	}
+	// creates the clientset
+	clientset, err := kubernetes.NewForConfig(config)
+	if err != nil {
+		f.log.Error("[Fluence RegisterExisting] Error creating client for config: %s\n", err)
+		return err
+	}
+	// get pods in all the namespaces by omitting namespace
+	// Or specify namespace to get pods in particular namespace
+	pods, err := clientset.CoreV1().Pods("").List(ctx, metav1.ListOptions{})
+	if err != nil {
+		f.log.Info("[Fluence RegisterExisting] Error listing pods: %s\n", err)
+		return err
+	}
+	f.log.Info("[Fluence RegisterExisting] Found %d existing pods in the cluster\n", len(pods.Items))
+	return nil
+}
diff --git a/sig-scheduler-plugins/pkg/logger/logger.go b/sig-scheduler-plugins/pkg/logger/logger.go
new file mode 100644
index 0000000..522be61
--- /dev/null
+++ b/sig-scheduler-plugins/pkg/logger/logger.go
@@ -0,0 +1,88 @@
+package logger
+
+// A small debug logger to write to file instead of klog
+// I don't know where to close, so I'm opening and appending each time
+// It's a bad design, but will work for debugging.
+
+import (
+	"fmt"
+	"log"
+	"os"
+)
+
+const (
+	LevelNone = iota
+	LevelInfo
+	LevelWarning
+	LevelError
+	LevelVerbose
+	LevelDebug
+)
+
+// TODO try saving state here when we can close
+type DebugLogger struct {
+	level    int
+	Filename string
+	handle   *os.File
+}
+
+func NewDebugLogger(level int, filename string) *DebugLogger {
+	return &DebugLogger{
+		level:    LevelNone,
+		Filename: filename,
+	}
+}
+
+func (l *DebugLogger) Start() (*log.Logger, error) {
+	f, err := os.OpenFile(l.Filename, os.O_WRONLY|os.O_CREATE|os.O_APPEND, os.ModePerm)
+	if err != nil {
+		return nil, err
+	}
+	logger := log.New(f, "", 0)
+	l.handle = f
+	return logger, nil
+}
+func (l *DebugLogger) Stop() error {
+	if l.handle != nil {
+		return l.handle.Close()
+	}
+	return nil
+}
+
+// Logging functions you should use!
+func (l *DebugLogger) Info(message ...any) error {
+	return l.log(LevelInfo, "   INFO: ", message...)
+}
+func (l *DebugLogger) Error(message ...any) error {
+	return l.log(LevelError, "  ERROR: ", message...)
+}
+func (l *DebugLogger) Debug(message ...any) error {
+	return l.log(LevelDebug, "  DEBUG: ", message...)
+}
+func (l *DebugLogger) Verbose(message ...any) error {
+	return l.log(LevelVerbose, "VERBOSE: ", message...)
+}
+func (l *DebugLogger) Warning(message ...any) error {
+	return l.log(LevelWarning, "WARNING: ", message...)
+}
+
+// log is the shared class function for actually printing to the log
+func (l *DebugLogger) log(level int, prefix string, message ...any) error {
+	logger, err := l.Start()
+	if err != nil {
+		return err
+	}
+	// Assume the prolog (to be formatted) is at index 0
+	prolog := message[0].(string)
+	if prefix != "" {
+		prolog = prefix + " " + prolog
+	}
+	rest := message[1:]
+
+	//	msg := fmt.Sprintf(message...)
+	fmt.Printf("Compariing level %d >= %d\n", level, l.level)
+	if level >= l.level {
+		logger.Printf(prolog, rest...)
+	}
+	return l.Stop()
+}
diff --git a/src/fluence/utils/utils.go b/src/fluence/utils/utils.go
index f81f81c..e429056 100644
--- a/src/fluence/utils/utils.go
+++ b/src/fluence/utils/utils.go
@@ -4,6 +4,8 @@ import (
 	"context"
 	"fmt"
 
+	klog "k8s.io/klog/v2"
+
 	"encoding/json"
 
 	"github.com/flux-framework/flux-k8s/flux-plugin/fluence/jgf"
@@ -20,7 +22,56 @@ var (
 	controlPlaneLabel = "node-role.kubernetes.io/control-plane"
 )
 
+// RegisterExisting uses the in cluster API to get existing pods
+// This is actually the same as computeTotalRequests but I wanted to compare the two
+// It is currently not being used. The main difference is that below, we are essentially
+// rounding the cpu to the smaller unit (logically for the graph) but losing some
+// granularity, if we think "milli" values have feet.
+func RegisterExisting(clientset *kubernetes.Clientset, ctx context.Context) (map[string]PodSpec, error) {
+
+	// We are using PodSpec as a holder for a *summary* of cpu/memory being used
+	// by the node, it is a summation across pods we find on each one
+	nodes := map[string]PodSpec{}
+
+	// get pods in all the namespaces by omitting namespace
+	// Or specify namespace to get pods in particular namespace
+	pods, err := clientset.CoreV1().Pods("").List(ctx, metav1.ListOptions{})
+	if err != nil {
+		klog.Infof("Error listing pods: %s\n", err)
+		return nodes, err
+	}
+	klog.Infof("Found %d existing pods in the cluster\n", len(pods.Items))
+
+	// Create a new PodSpec for each
+	for _, pod := range pods.Items {
+
+		// Add the node to our lookup if we don't have it yet
+		_, ok := nodes[pod.Spec.NodeName]
+		if !ok {
+			nodes[pod.Spec.NodeName] = PodSpec{}
+		}
+		ps := nodes[pod.Spec.NodeName]
+
+		for _, container := range pod.Spec.Containers {
+			specRequests := container.Resources.Requests
+			ps.Cpu += int32(specRequests.Cpu().Value())
+			ps.Memory += specRequests.Memory().Value()
+			ps.Storage += specRequests.StorageEphemeral().Value()
+
+			specLimits := container.Resources.Limits
+			gpuSpec := specLimits["nvidia.com/gpu"]
+			ps.Gpu += gpuSpec.Value()
+		}
+		nodes[pod.Spec.NodeName] = ps
+	}
+	return nodes, nil
+}
+
 // CreateJGF creates the Json Graph Format
+// We currently don't have support in fluxion to allocate jobs for existing pods,
+// so instead we create the graph with fewer resources. When that support is
+// added (see sig-scheduler-plugins/pkg/fluence/register.go) we can
+// remove the adjustment here, which is more of a hack
 func CreateJGF(filename string, skipLabel *string) error {
 	ctx := context.Background()
 	config, err := rest.InClusterConfig()
@@ -28,16 +79,19 @@ func CreateJGF(filename string, skipLabel *string) error {
 		fmt.Println("Error getting InClusterConfig")
 		return err
 	}
-	// creates the clientset
 	clientset, err := kubernetes.NewForConfig(config)
 	if err != nil {
-		fmt.Println("Error getting ClientSet")
+		fmt.Printf("Error getting ClientSet: %s", err)
 		return err
 	}
 	nodes, err := clientset.CoreV1().Nodes().List(ctx, metav1.ListOptions{})
+	if err != nil {
+		fmt.Printf("Error listing nodes: %s", err)
+		return err
+	}
 
-	var fluxgraph jgf.Fluxjgf
-	fluxgraph = jgf.InitJGF()
+	// Create a Flux Json Graph Format (JGF) with all cluster nodes
+	fluxgraph := jgf.InitJGF()
 
 	// TODO it looks like we can add more to the graph here -
 	// let's remember to consider what else we can.
@@ -53,11 +107,11 @@ func CreateJGF(filename string, skipLabel *string) error {
 
 	vcores := 0
 	fmt.Println("Number nodes ", len(nodes.Items))
-	var totalAllocCpu, totalmem int64
+	var totalAllocCpu int64
 	totalAllocCpu = 0
 	sdnCount := 0
 
-	for node_index, node := range nodes.Items {
+	for nodeIndex, node := range nodes.Items {
 
 		// We should not be scheduling to the control plane
 		_, ok := node.Labels[controlPlaneLabel]
@@ -71,107 +125,121 @@ func CreateJGF(filename string, skipLabel *string) error {
 		if *skipLabel != "" {
 			_, ok := node.Labels[*skipLabel]
 			if ok {
-				fmt.Println("Skipping node ", node.GetName())
+				fmt.Printf("Skipping node %s\n", node.GetName())
 				continue
 			}
 		}
 
-		fmt.Println("node in flux group ", node.GetName())
-		if !node.Spec.Unschedulable {
-			fieldselector, err := fields.ParseSelector("spec.nodeName=" + node.GetName() + ",status.phase!=" + string(corev1.PodSucceeded) + ",status.phase!=" + string(corev1.PodFailed))
-			if err != nil {
-				return err
-			}
-			pods, err := clientset.CoreV1().Pods("").List(ctx, metav1.ListOptions{
-				FieldSelector: fieldselector.String(),
-			})
-			if err != nil {
-				return err
-			}
+		if node.Spec.Unschedulable {
+			fmt.Printf("Skipping node %s, unschedulable\n", node.GetName())
+			continue
+		}
 
-			// fmt.Println("Node ", node.GetName(), " has pods ", pods)
-			// Check if subnet already exists
-			// Here we build subnets according to topology.kubernetes.io/zone label
-			subnetName := node.Labels["topology.kubernetes.io/zone"]
-			subnet := fluxgraph.MakeSubnet(sdnCount, subnetName)
-			sdnCount = sdnCount + 1
-			fluxgraph.MakeEdge(cluster, subnet, "contains")
-			fluxgraph.MakeEdge(subnet, cluster, "in")
-
-			reqs := computeTotalRequests(pods)
-			cpuReqs := reqs[corev1.ResourceCPU]
-			memReqs := reqs[corev1.ResourceMemory]
-
-			avail := node.Status.Allocatable.Cpu().MilliValue()
-			totalcpu := int64((avail - cpuReqs.MilliValue()) / 1000) //- 1
-			fmt.Println("Node ", node.GetName(), " flux cpu ", totalcpu)
-			totalAllocCpu = totalAllocCpu + totalcpu
-			totalmem = node.Status.Allocatable.Memory().Value() - memReqs.Value()
-			fmt.Println("Node ", node.GetName(), " total mem ", totalmem)
-			gpuAllocatable, hasGpuAllocatable := node.Status.Allocatable["nvidia.com/gpu"]
-
-			// reslist := node.Status.Allocatable
-			// resources := make([]corev1.ResourceName, 0, len(reslist))
-			// for resource := range reslist {
-			// 	fmt.Println("resource ", resource)
-			// 	resources = append(resources, resource)
-			// }
-			// for _, resource := range resources {
-			// 	value := reslist[resource]
-
-			// 	fmt.Printf(" %s:\t%s\n", resource, value.String())
-			// }
-
-			workernode := fluxgraph.MakeNode(node_index, false, node.Name)
-			fluxgraph.MakeEdge(subnet, workernode, "contains") // this is rack otherwise
-			fluxgraph.MakeEdge(workernode, subnet, "in")       // this is rack otherwise
-
-			// socket := fluxgraph.MakeSocket(0, "socket")
-			// fluxgraph.MakeEdge(workernode, socket, "contains")
-			// fluxgraph.MakeEdge(socket, workernode, "in")
-
-			if hasGpuAllocatable {
-				fmt.Println("GPU Resource quantity ", gpuAllocatable.Value())
-				//MakeGPU(index int, name string, size int) string {
-				for index := 0; index < int(gpuAllocatable.Value()); index++ {
-					gpu := fluxgraph.MakeGPU(index, "nvidiagpu", 1)
-					fluxgraph.MakeEdge(workernode, gpu, "contains") // workernode was socket
-					fluxgraph.MakeEdge(gpu, workernode, "in")
-				}
+		fieldselector, err := fields.ParseSelector("spec.nodeName=" + node.GetName() + ",status.phase!=" + string(corev1.PodSucceeded) + ",status.phase!=" + string(corev1.PodFailed))
+		if err != nil {
+			return err
+		}
+		pods, err := clientset.CoreV1().Pods("").List(ctx, metav1.ListOptions{
+			FieldSelector: fieldselector.String(),
+		})
+		if err != nil {
+			return err
+		}
 
+		// Check if subnet already exists
+		// Here we build subnets according to topology.kubernetes.io/zone label
+		subnetName := node.Labels["topology.kubernetes.io/zone"]
+		subnet := fluxgraph.MakeSubnet(sdnCount, subnetName)
+		sdnCount = sdnCount + 1
+		fluxgraph.MakeEdge(cluster, subnet, "contains")
+		fluxgraph.MakeEdge(subnet, cluster, "in")
+
+		// These are requests for existing pods, for cpu and memory
+		reqs := computeTotalRequests(pods)
+		cpuReqs := reqs[corev1.ResourceCPU]
+		memReqs := reqs[corev1.ResourceMemory]
+
+		// Actual values that we have available (minus requests)
+		totalCpu := node.Status.Allocatable.Cpu().MilliValue()
+		totalMem := node.Status.Allocatable.Memory().Value()
+
+		// Values accounting for requests
+		availCpu := int64((totalCpu - cpuReqs.MilliValue()) / 1000)
+		availMem := totalMem - memReqs.Value()
+
+		// Show existing to compare to
+		fmt.Printf("\n📦️ %s\n", node.GetName())
+		fmt.Printf("      allocated cpu: %d\n", cpuReqs.Value())
+		fmt.Printf("      allocated mem: %d\n", memReqs.Value())
+		fmt.Printf("      available cpu: %d\n", availCpu)
+		fmt.Printf("       running pods: %d\n", len(pods.Items))
+
+		// keep track of overall total
+		totalAllocCpu += availCpu
+		fmt.Printf("      available mem: %d\n", availMem)
+		gpuAllocatable, hasGpuAllocatable := node.Status.Allocatable["nvidia.com/gpu"]
+
+		// reslist := node.Status.Allocatable
+		// resources := make([]corev1.ResourceName, 0, len(reslist))
+		// for resource := range reslist {
+		// 	fmt.Println("resource ", resource)
+		// 	resources = append(resources, resource)
+		// }
+		// for _, resource := range resources {
+		// 	value := reslist[resource]
+
+		// 	fmt.Printf(" %s:\t%s\n", resource, value.String())
+		// }
+
+		workernode := fluxgraph.MakeNode(nodeIndex, false, node.Name)
+		fluxgraph.MakeEdge(subnet, workernode, "contains") // this is rack otherwise
+		fluxgraph.MakeEdge(workernode, subnet, "in")       // this is rack otherwise
+
+		// socket := fluxgraph.MakeSocket(0, "socket")
+		// fluxgraph.MakeEdge(workernode, socket, "contains")
+		// fluxgraph.MakeEdge(socket, workernode, "in")
+
+		if hasGpuAllocatable {
+			fmt.Println("GPU Resource quantity ", gpuAllocatable.Value())
+			//MakeGPU(index int, name string, size int) string {
+			for index := 0; index < int(gpuAllocatable.Value()); index++ {
+				gpu := fluxgraph.MakeGPU(index, "nvidiagpu", 1)
+				fluxgraph.MakeEdge(workernode, gpu, "contains") // workernode was socket
+				fluxgraph.MakeEdge(gpu, workernode, "in")
 			}
 
-			for index := 0; index < int(totalcpu); index++ {
-				// MakeCore(index int, name string)
-				core := fluxgraph.MakeCore(index, "core")
-				fluxgraph.MakeEdge(workernode, core, "contains") // workernode was socket
-				fluxgraph.MakeEdge(core, workernode, "in")
-
-				// Question from Vanessa:
-				// How can we get here and have vcores ever not equal to zero?
-				if vcores == 0 {
-					fluxgraph.MakeNFDProperties(core, index, "cpu-", &node.Labels)
-					// fluxgraph.MakeNFDProperties(core, index, "netmark-", &node.Labels)
-				} else {
-					for vc := 0; vc < vcores; vc++ {
-						vcore := fluxgraph.MakeVCore(core, vc, "vcore")
-						fluxgraph.MakeNFDProperties(vcore, index, "cpu-", &node.Labels)
-					}
+		}
+
+		for index := 0; index < int(availCpu); index++ {
+			// MakeCore(index int, name string)
+			core := fluxgraph.MakeCore(index, "core")
+			fluxgraph.MakeEdge(workernode, core, "contains") // workernode was socket
+			fluxgraph.MakeEdge(core, workernode, "in")
+
+			// Question from Vanessa:
+			// How can we get here and have vcores ever not equal to zero?
+			if vcores == 0 {
+				fluxgraph.MakeNFDProperties(core, index, "cpu-", &node.Labels)
+				// fluxgraph.MakeNFDProperties(core, index, "netmark-", &node.Labels)
+			} else {
+				for vc := 0; vc < vcores; vc++ {
+					vcore := fluxgraph.MakeVCore(core, vc, "vcore")
+					fluxgraph.MakeNFDProperties(vcore, index, "cpu-", &node.Labels)
 				}
 			}
+		}
 
-			// MakeMemory(index int, name string, unit string, size int)
-			fractionmem := totalmem >> 30
-			// fractionmem := (totalmem/totalcpu) >> 20
-			// fmt.Println("Creating ", fractionmem, " vertices with ", 1<<10, " MB of mem")
-			for i := 0; i < /*int(totalcpu)*/ int(fractionmem); i++ {
-				mem := fluxgraph.MakeMemory(i, "memory", "MB", int(1<<10))
-				fluxgraph.MakeEdge(workernode, mem, "contains")
-				fluxgraph.MakeEdge(mem, workernode, "in")
-			}
+		// MakeMemory(index int, name string, unit string, size int)
+		fractionMem := availMem >> 30
+		// fractionmem := (totalmem/totalcpu) >> 20
+		// fmt.Println("Creating ", fractionmem, " vertices with ", 1<<10, " MB of mem")
+		for i := 0; i < /*int(totalcpu)*/ int(fractionMem); i++ {
+			mem := fluxgraph.MakeMemory(i, "memory", "MB", int(1<<10))
+			fluxgraph.MakeEdge(workernode, mem, "contains")
+			fluxgraph.MakeEdge(mem, workernode, "in")
 		}
 	}
-	fmt.Println("Can request at most ", totalAllocCpu, " exclusive cpu")
+	fmt.Printf("\nCan request at most %d exclusive cpu", totalAllocCpu)
 	err = fluxgraph.WriteJGF(filename)
 	if err != nil {
 		return err

From 8c99f108f80f2a6fcbf83dbab8e273e225cb6073 Mon Sep 17 00:00:00 2001
From: vsoch <vsoch@users.noreply.github.com>
Date: Thu, 4 Apr 2024 19:40:15 -0600
Subject: [PATCH 24/28] test: only allow scheduling first pod

Problem: we currently allow any pod in the group to make the request
Solution: Making a BIG assumption that might be wrong, I am adding logic
that only allows scheduling (meaning going through PreFilter with AskFlux)
given that we see the first pod in the listing. In practice this is the first
index (e.g., index 0) which based on our sorting strategy (timestamp then name)
I think might work. But I am not 100% on that. The reason we want to do that is
so the nodes are chosen for the first pod, and then the group can quickly
follow and be actually assigned. Before I did this I kept seeing huge delays
in waiting for the queue to move (e.g., 5/6 pods Running and the last one
waiting, and then kicking in much later like an old car) and I think with
this tweak that is fixed. But this is my subjective evaluation. I am
also adding in the hack script for deploying to gke, which requires a
push instead of a kind load.

Signed-off-by: vsoch <vsoch@users.noreply.github.com>
---
 README.md                                     |  1 -
 hack/quick-build-gke.sh                       | 33 ++++++++++++++++
 hack/quick-build.sh                           |  2 +-
 .../pkg/fluence/core/core.go                  | 39 ++++++++++++++++---
 sig-scheduler-plugins/pkg/logger/logger.go    |  3 +-
 5 files changed, 68 insertions(+), 10 deletions(-)
 create mode 100755 hack/quick-build-gke.sh

diff --git a/README.md b/README.md
index e3e1214..ff3327e 100644
--- a/README.md
+++ b/README.md
@@ -8,7 +8,6 @@ Fluence enables HPC-grade pod scheduling in Kubernetes via the [Kubernetes Sched
 
 ## TODO
 
-- On init, need to load in resource graph that accounts for running stuff
 - Need to allow for restart / crashes and looking up existing jobid, updating maps in PodGroup
 - Since AskFlux is done on level of pod group, refactor function to account for specific resources of all pods (not just one pod)
 - Figure out if EventsToRegister replaces old informer
diff --git a/hack/quick-build-gke.sh b/hack/quick-build-gke.sh
new file mode 100755
index 0000000..875360a
--- /dev/null
+++ b/hack/quick-build-gke.sh
@@ -0,0 +1,33 @@
+#!/bin/bash
+
+# Before running this, you should:
+# 1. create the kind cluster (needs more than one node, fluence does not scheduler to the control plane)
+# 2. Install cert-manager
+# 3. Customize the script to point to your registry if you intend to push
+
+REGISTRY="${1:-ghcr.io/vsoch}"
+HERE=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
+ROOT=$(dirname ${HERE})
+
+# Go to the script directory
+cd ${ROOT}
+
+# These build each of the images. The sidecar is separate from the other two in src/
+make REGISTRY=${REGISTRY} SCHEDULER_IMAGE=fluence SIDECAR_IMAGE=fluence-sidecar CONTROLLER_IMAGE=fluence-controller
+
+# This is what it might look like to push
+# docker push ghcr.io/vsoch/fluence-sidecar && docker push ghcr.io/vsoch/fluence-controller && docker push ghcr.io/vsoch/fluence:latest
+
+# We load into kind so we don't need to push/pull and use up internet data ;)
+docker push ${REGISTRY}/fluence-sidecar:latest
+docker push ${REGISTRY}/fluence-controller:latest
+docker push ${REGISTRY}/fluence:latest
+
+# And then install using the charts. The pull policy ensures we use the loaded ones
+cd ${ROOT}/upstream/manifests/install/charts
+helm uninstall fluence || true
+helm install \
+  --set scheduler.image=${REGISTRY}/fluence:latest \
+  --set controller.image=${REGISTRY}/fluence-controller:latest \
+  --set scheduler.sidecarimage=${REGISTRY}/fluence-sidecar:latest \
+        fluence as-a-second-scheduler/
diff --git a/hack/quick-build.sh b/hack/quick-build.sh
index b3ccefe..23a5c87 100755
--- a/hack/quick-build.sh
+++ b/hack/quick-build.sh
@@ -33,4 +33,4 @@ helm install \
   --set controller.pullPolicy=Never \
   --set controller.image=${REGISTRY}/fluence-controller:latest \
   --set scheduler.sidecarimage=${REGISTRY}/fluence-sidecar:latest \
-        fluence as-a-second-scheduler/
\ No newline at end of file
+        fluence as-a-second-scheduler/
diff --git a/sig-scheduler-plugins/pkg/fluence/core/core.go b/sig-scheduler-plugins/pkg/fluence/core/core.go
index 8b08468..1e75814 100644
--- a/sig-scheduler-plugins/pkg/fluence/core/core.go
+++ b/sig-scheduler-plugins/pkg/fluence/core/core.go
@@ -71,7 +71,7 @@ type PodGroupManager struct {
 	scheduleTimeout *time.Duration
 	// permittedPG stores the podgroup name which has passed the pre resource check.
 	permittedPG *gochache.Cache
-	// backedOffPG stores the podgorup name which failed scheudling recently.
+	// backedOffPG stores the podgorup name which failed scheduling recently.
 	backedOffPG *gochache.Cache
 	// podLister is pod lister
 	podLister listerv1.PodLister
@@ -111,12 +111,25 @@ func NewPodGroupManager(
 }
 
 // GetStatuses string (of all pods) to show for debugging purposes
-func (pgMgr *PodGroupManager) GetStatuses(pods []*corev1.Pod) string {
+// Since we loop here, we also determine if the first pod is the one
+// we are considering
+func (pgMgr *PodGroupManager) GetStatusesAndIndex(
+	pods []*corev1.Pod,
+	pod *corev1.Pod,
+) (string, bool, int) {
 	statuses := ""
-	for _, pod := range pods {
-		statuses += " " + fmt.Sprintf("%s", pod.Status.Phase)
+
+	// We need to distinguish 0 from the default and not finding anything
+	foundIndex := false
+	index := 0
+	for i, p := range pods {
+		if p.Name == pod.Name {
+			foundIndex = true
+			index = i
+		}
+		statuses += " " + fmt.Sprintf("%s", p.Status.Phase)
 	}
-	return statuses
+	return statuses, foundIndex, index
 }
 
 // GetPodNode is a quick lookup to see if we have a node
@@ -153,8 +166,10 @@ func (pgMgr *PodGroupManager) PreFilter(
 		return fmt.Errorf("podLister list pods failed: %w", err)
 	}
 
+	// Only allow scheduling the first in the group so the others come after
+
 	// Get statuses to show for debugging
-	statuses := pgMgr.GetStatuses(pods)
+	statuses, found, idx := pgMgr.GetStatusesAndIndex(pods, pod)
 
 	// This shows us the number of pods we have in the set and their states
 	pgMgr.log.Info("[PodGroup PreFilter] group: %s pods: %s MinMember: %d Size: %d", pgFullName, statuses, pg.Spec.MinMember, len(pods))
@@ -163,6 +178,18 @@ func (pgMgr *PodGroupManager) PreFilter(
 			"current pods number: %v, minMember of group: %v", pod.Name, len(pods), pg.Spec.MinMember)
 	}
 
+	if !found {
+		return fmt.Errorf("pod %s was not found in group - this should not happen", pod.Name)
+	}
+
+	// We only will AskFlux for the first pod
+	// This makes an assumption that the order listed is the order in the queue, I'm not
+	// sure that is true in practice. This is the one case with retry. This design
+	// probably needs thinking and work.
+	if idx != 0 {
+		return fmt.Errorf("pod %s is not first in the list, will wait to schedule", pod.Name)
+	}
+
 	// TODO we likely can take advantage of these resources or other custom
 	// attributes we add. For now ignore and calculate based on pod needs (above)
 	// if pg.Spec.MinResources == nil {
diff --git a/sig-scheduler-plugins/pkg/logger/logger.go b/sig-scheduler-plugins/pkg/logger/logger.go
index 522be61..053021a 100644
--- a/sig-scheduler-plugins/pkg/logger/logger.go
+++ b/sig-scheduler-plugins/pkg/logger/logger.go
@@ -19,7 +19,6 @@ const (
 	LevelDebug
 )
 
-// TODO try saving state here when we can close
 type DebugLogger struct {
 	level    int
 	Filename string
@@ -28,7 +27,7 @@ type DebugLogger struct {
 
 func NewDebugLogger(level int, filename string) *DebugLogger {
 	return &DebugLogger{
-		level:    LevelNone,
+		level:    level,
 		Filename: filename,
 	}
 }

From d8e67fa6520d3b4a107e6bd36f7a2ebc34457f05 Mon Sep 17 00:00:00 2001
From: vsoch <vsoch@users.noreply.github.com>
Date: Wed, 17 Apr 2024 00:54:02 -0600
Subject: [PATCH 25/28] test: adding permit to allow for sibling pod scheduling

Problem: the submit of the first index works for more controlled lengths (e.g., lammps takes a while)
but was having issues with really quick jobs.
Solution: try restoring the queue that allows for enabling siblings pods so any group
can be scheduled.

Signed-off-by: vsoch <vsoch@users.noreply.github.com>
---
 examples/pod-group-jobs/job1.yaml             |  59 +++++++
 examples/pod-group-jobs/job2.yaml             |  59 +++++++
 .../pkg/fluence/core/core.go                  | 161 +++++++++++++++---
 sig-scheduler-plugins/pkg/fluence/fluence.go  | 139 ++++++++++++++-
 .../pkg/fluence/group/group.go                |  18 ++
 sig-scheduler-plugins/pkg/logger/logger.go    |   4 +-
 6 files changed, 406 insertions(+), 34 deletions(-)
 create mode 100644 examples/pod-group-jobs/job1.yaml
 create mode 100644 examples/pod-group-jobs/job2.yaml

diff --git a/examples/pod-group-jobs/job1.yaml b/examples/pod-group-jobs/job1.yaml
new file mode 100644
index 0000000..e0ebba0
--- /dev/null
+++ b/examples/pod-group-jobs/job1.yaml
@@ -0,0 +1,59 @@
+apiVersion: v1
+kind: Service
+metadata:
+  name: s0
+spec:
+  clusterIP: None
+  selector:
+    job-name: job-0
+---
+apiVersion: batch/v1
+kind: Job
+metadata:
+  # name will be derived based on iteration
+  name: job-0
+spec:
+  completions: 4
+  parallelism: 4
+  completionMode: Indexed
+  template:
+    metadata:
+      labels:
+        app: job-0        
+    spec:
+      subdomain: s0
+      schedulerName: fluence
+      restartPolicy: Never
+      containers:
+      - name: example-workload
+        image: bash:latest
+        resources:
+          limits:
+            cpu: "3"
+          requests:
+            cpu: "3"
+        command:
+        - bash
+        - -c
+        - |
+          if [ $JOB_COMPLETION_INDEX -ne "0" ]
+            then
+              sleep infinity
+          fi
+          echo "START: $(date +%s)"
+          for i in 0 1 2 3
+          do
+            gotStatus="-1"
+            wantStatus="0"             
+            while [ $gotStatus -ne $wantStatus ]
+            do                      
+              ping -c 1 job-0-${i}.s0 > /dev/null 2>&1
+              gotStatus=$?                
+              if [ $gotStatus -ne $wantStatus ]; then
+                echo "Failed to ping pod job-0-${i}.s0, retrying in 1 second..."
+                sleep 1
+              fi
+            done                                                         
+            echo "Successfully pinged pod: job-0-${i}.s0"
+          done
+          echo "DONE: $(date +%s)"
\ No newline at end of file
diff --git a/examples/pod-group-jobs/job2.yaml b/examples/pod-group-jobs/job2.yaml
new file mode 100644
index 0000000..c39820b
--- /dev/null
+++ b/examples/pod-group-jobs/job2.yaml
@@ -0,0 +1,59 @@
+apiVersion: v1
+kind: Service
+metadata:
+  name: s1
+spec:
+  clusterIP: None
+  selector:
+    job-name: job-1
+---
+apiVersion: batch/v1
+kind: Job
+metadata:
+  # name will be derived based on iteration
+  name: job-1
+spec:
+  completions: 4
+  parallelism: 4
+  completionMode: Indexed
+  template:
+    metadata:
+      labels:
+        app: job-1
+    spec:
+      subdomain: s1
+      schedulerName: fluence
+      restartPolicy: Never
+      containers:
+      - name: example-workload
+        image: bash:latest
+        resources:
+          limits:
+            cpu: "3"
+          requests:
+            cpu: "3"
+        command:
+        - bash
+        - -c
+        - |
+          if [ $JOB_COMPLETION_INDEX -ne "0" ]
+            then
+              sleep infinity
+          fi
+          echo "START: $(date +%s)"
+          for i in 0 1 2 3
+          do
+            gotStatus="-1"
+            wantStatus="0"             
+            while [ $gotStatus -ne $wantStatus ]
+            do                      
+              ping -c 1 job-0-${i}.s1 > /dev/null 2>&1
+              gotStatus=$?    
+              if [ $gotStatus -ne $wantStatus ]; then
+                echo "Failed to ping pod job-0-${i}.s1, retrying in 1 second..."
+                sleep 1
+              fi
+            done                                                         
+            echo "Successfully pinged pod: job-0-${i}.s1"
+          done
+          echo "DONE: $(date +%s)"
\ No newline at end of file
diff --git a/sig-scheduler-plugins/pkg/fluence/core/core.go b/sig-scheduler-plugins/pkg/fluence/core/core.go
index 1e75814..ea300ce 100644
--- a/sig-scheduler-plugins/pkg/fluence/core/core.go
+++ b/sig-scheduler-plugins/pkg/fluence/core/core.go
@@ -39,11 +39,33 @@ import (
 	"sigs.k8s.io/scheduler-plugins/pkg/util"
 )
 
+type Status string
+
+const (
+	// PodGroupNotSpecified denotes no PodGroup is specified in the Pod spec.
+	PodGroupNotSpecified Status = "PodGroup not specified"
+	// PodGroupNotFound denotes the specified PodGroup in the Pod spec is
+	// not found in API server.
+	PodGroupNotFound Status = "PodGroup not found"
+	Success          Status = "Success"
+	Wait             Status = "Wait"
+
+	permitStateKey = "PermitFluence"
+)
+
 // TODO should eventually store group name here to reassociate on reload
 type FluxStateData struct {
 	NodeName string
 }
 
+type PermitState struct {
+	Activate bool
+}
+
+func (s *PermitState) Clone() framework.StateData {
+	return &PermitState{Activate: s.Activate}
+}
+
 func (s *FluxStateData) Clone() framework.StateData {
 	clone := &FluxStateData{
 		NodeName: s.NodeName,
@@ -58,6 +80,10 @@ type Manager interface {
 	GetPodGroup(context.Context, *corev1.Pod) (string, *v1alpha1.PodGroup)
 	GetCreationTimestamp(*corev1.Pod, time.Time) time.Time
 	DeletePermittedPodGroup(string)
+	Permit(context.Context, *framework.CycleState, *corev1.Pod) Status
+	CalculateAssignedPods(string, string) int
+	ActivateSiblings(pod *corev1.Pod, state *framework.CycleState)
+	BackoffPodGroup(string, time.Duration)
 }
 
 // PodGroupManager defines the scheduling operation called
@@ -110,26 +136,69 @@ func NewPodGroupManager(
 	return pgMgr
 }
 
+func (pgMgr *PodGroupManager) BackoffPodGroup(pgName string, backoff time.Duration) {
+	if backoff == time.Duration(0) {
+		return
+	}
+	pgMgr.backedOffPG.Add(pgName, nil, backoff)
+}
+
+// ActivateSiblings stashes the pods belonging to the same PodGroup of the given pod
+// in the given state, with a reserved key "kubernetes.io/pods-to-activate".
+func (pgMgr *PodGroupManager) ActivateSiblings(pod *corev1.Pod, state *framework.CycleState) {
+	pgName := util.GetPodGroupLabel(pod)
+	if pgName == "" {
+		return
+	}
+
+	// Only proceed if it's explicitly requested to activate sibling pods.
+	if c, err := state.Read(permitStateKey); err != nil {
+		return
+	} else if s, ok := c.(*PermitState); !ok || !s.Activate {
+		return
+	}
+
+	pods, err := pgMgr.podLister.Pods(pod.Namespace).List(
+		labels.SelectorFromSet(labels.Set{v1alpha1.PodGroupLabel: pgName}),
+	)
+	if err != nil {
+		klog.ErrorS(err, "Failed to obtain pods belong to a PodGroup", "podGroup", pgName)
+		return
+	}
+
+	for i := range pods {
+		if pods[i].UID == pod.UID {
+			pods = append(pods[:i], pods[i+1:]...)
+			break
+		}
+	}
+
+	if len(pods) != 0 {
+		if c, err := state.Read(framework.PodsToActivateKey); err == nil {
+			if s, ok := c.(*framework.PodsToActivate); ok {
+				s.Lock()
+				for _, pod := range pods {
+					namespacedName := GetNamespacedName(pod)
+					s.Map[namespacedName] = pod
+				}
+				s.Unlock()
+			}
+		}
+	}
+}
+
 // GetStatuses string (of all pods) to show for debugging purposes
-// Since we loop here, we also determine if the first pod is the one
-// we are considering
-func (pgMgr *PodGroupManager) GetStatusesAndIndex(
+func (pgMgr *PodGroupManager) GetStatuses(
 	pods []*corev1.Pod,
 	pod *corev1.Pod,
-) (string, bool, int) {
+) string {
 	statuses := ""
 
 	// We need to distinguish 0 from the default and not finding anything
-	foundIndex := false
-	index := 0
-	for i, p := range pods {
-		if p.Name == pod.Name {
-			foundIndex = true
-			index = i
-		}
+	for _, p := range pods {
 		statuses += " " + fmt.Sprintf("%s", p.Status.Phase)
 	}
-	return statuses, foundIndex, index
+	return statuses
 }
 
 // GetPodNode is a quick lookup to see if we have a node
@@ -138,6 +207,39 @@ func (pgMgr *PodGroupManager) GetPodNode(pod *corev1.Pod) string {
 	return node
 }
 
+// Permit permits a pod to run, if the minMember match, it would send a signal to chan.
+func (pgMgr *PodGroupManager) Permit(ctx context.Context, state *framework.CycleState, pod *corev1.Pod) Status {
+	pgFullName, pg := pgMgr.GetPodGroup(ctx, pod)
+	if pgFullName == "" {
+		return PodGroupNotSpecified
+	}
+	if pg == nil {
+		// A Pod with a podGroup name but without a PodGroup found is denied.
+		return PodGroupNotFound
+	}
+
+	assigned := pgMgr.CalculateAssignedPods(pg.Name, pg.Namespace)
+	// The number of pods that have been assigned nodes is calculated from the snapshot.
+	// The current pod in not included in the snapshot during the current scheduling cycle.
+	if int32(assigned)+1 >= pg.Spec.MinMember {
+		return Success
+	}
+
+	if assigned == 0 {
+		// Given we've reached Permit(), it's mean all PreFilter checks (minMember & minResource)
+		// already pass through, so if assigned == 0, it could be due to:
+		// - minResource get satisfied
+		// - new pods added
+		// In either case, we should and only should use this 0-th pod to trigger activating
+		// its siblings.
+		// It'd be in-efficient if we trigger activating siblings unconditionally.
+		// See https://github.com/kubernetes-sigs/scheduler-plugins/issues/682
+		state.Write(permitStateKey, &PermitState{Activate: true})
+	}
+
+	return Wait
+}
+
 // PreFilter filters out a pod if
 // 1. it belongs to a podgroup that was recently denied or
 // 2. the total number of pods in the podgroup is less than the minimum number of pods
@@ -169,7 +271,7 @@ func (pgMgr *PodGroupManager) PreFilter(
 	// Only allow scheduling the first in the group so the others come after
 
 	// Get statuses to show for debugging
-	statuses, found, idx := pgMgr.GetStatusesAndIndex(pods, pod)
+	statuses := pgMgr.GetStatuses(pods, pod)
 
 	// This shows us the number of pods we have in the set and their states
 	pgMgr.log.Info("[PodGroup PreFilter] group: %s pods: %s MinMember: %d Size: %d", pgFullName, statuses, pg.Spec.MinMember, len(pods))
@@ -178,18 +280,6 @@ func (pgMgr *PodGroupManager) PreFilter(
 			"current pods number: %v, minMember of group: %v", pod.Name, len(pods), pg.Spec.MinMember)
 	}
 
-	if !found {
-		return fmt.Errorf("pod %s was not found in group - this should not happen", pod.Name)
-	}
-
-	// We only will AskFlux for the first pod
-	// This makes an assumption that the order listed is the order in the queue, I'm not
-	// sure that is true in practice. This is the one case with retry. This design
-	// probably needs thinking and work.
-	if idx != 0 {
-		return fmt.Errorf("pod %s is not first in the list, will wait to schedule", pod.Name)
-	}
-
 	// TODO we likely can take advantage of these resources or other custom
 	// attributes we add. For now ignore and calculate based on pod needs (above)
 	// if pg.Spec.MinResources == nil {
@@ -233,7 +323,9 @@ func (pgMgr *PodGroupManager) PreFilter(
 		stateData := FluxStateData{NodeName: node}
 		state.Write(framework.StateKey(pod.Name), &stateData)
 		// Also save to the podToNode lookup
+		pgMgr.mutex.Lock()
 		pgMgr.podToNode[pod.Name] = node
+		pgMgr.mutex.Unlock()
 	}
 	pgMgr.permittedPG.Add(pgFullName, pgFullName, *pgMgr.scheduleTimeout)
 	return nil
@@ -252,6 +344,25 @@ func (pgMgr *PodGroupManager) GetCreationTimestamp(pod *corev1.Pod, ts time.Time
 	return pg.CreationTimestamp.Time
 }
 
+// CalculateAssignedPods returns the number of pods that has been assigned nodes: assumed or bound.
+func (pgMgr *PodGroupManager) CalculateAssignedPods(podGroupName, namespace string) int {
+	nodeInfos, err := pgMgr.snapshotSharedLister.NodeInfos().List()
+	if err != nil {
+		pgMgr.log.Error("Cannot get nodeInfos from frameworkHandle: %s", err)
+		return 0
+	}
+	var count int
+	for _, nodeInfo := range nodeInfos {
+		for _, podInfo := range nodeInfo.Pods {
+			pod := podInfo.Pod
+			if util.GetPodGroupLabel(pod) == podGroupName && pod.Namespace == namespace && pod.Spec.NodeName != "" {
+				count++
+			}
+		}
+	}
+	return count
+}
+
 // DeletePermittedPodGroup deletes a podGroup that passes Pre-Filter but reaches PostFilter.
 func (pgMgr *PodGroupManager) DeletePermittedPodGroup(pgFullName string) {
 	pgMgr.permittedPG.Delete(pgFullName)
diff --git a/sig-scheduler-plugins/pkg/fluence/fluence.go b/sig-scheduler-plugins/pkg/fluence/fluence.go
index 84f3e95..099d2f3 100644
--- a/sig-scheduler-plugins/pkg/fluence/fluence.go
+++ b/sig-scheduler-plugins/pkg/fluence/fluence.go
@@ -22,8 +22,8 @@ import (
 	"sync"
 	"time"
 
+	"k8s.io/apimachinery/pkg/labels"
 	"k8s.io/apimachinery/pkg/util/sets"
-	klog "k8s.io/klog/v2"
 
 	"sigs.k8s.io/scheduler-plugins/pkg/logger"
 
@@ -33,12 +33,12 @@ import (
 	"k8s.io/client-go/tools/cache"
 
 	fgroup "sigs.k8s.io/scheduler-plugins/pkg/fluence/group"
+	flabel "sigs.k8s.io/scheduler-plugins/pkg/fluence/labels"
 
 	corev1helpers "k8s.io/component-helpers/scheduling/corev1"
 	"k8s.io/kubernetes/pkg/scheduler/framework"
 	"sigs.k8s.io/controller-runtime/pkg/client"
 
-	"sigs.k8s.io/scheduler-plugins/apis/config"
 	"sigs.k8s.io/scheduler-plugins/apis/scheduling"
 	"sigs.k8s.io/scheduler-plugins/apis/scheduling/v1alpha1"
 	fcore "sigs.k8s.io/scheduler-plugins/pkg/fluence/core"
@@ -52,6 +52,7 @@ type Fluence struct {
 	frameworkHandler framework.Handle
 	pgMgr            fcore.Manager
 	scheduleTimeout  *time.Duration
+	pgBackoff        *time.Duration
 	log              *logger.DebugLogger
 }
 
@@ -59,6 +60,15 @@ var (
 	_ framework.QueueSortPlugin = &Fluence{}
 	_ framework.PreFilterPlugin = &Fluence{}
 	_ framework.FilterPlugin    = &Fluence{}
+
+	_ framework.PostFilterPlugin = &Fluence{}
+	_ framework.PermitPlugin     = &Fluence{}
+	_ framework.ReservePlugin    = &Fluence{}
+
+	_ framework.EnqueueExtensions = &Fluence{}
+
+	permitWaitingTimeSeconds int64 = 60
+	podGroupBackoffSeconds   int64 = 0
 )
 
 const (
@@ -69,14 +79,12 @@ const (
 // Initialize and return a new Fluence Custom Scheduler Plugin
 func New(obj runtime.Object, handle framework.Handle) (framework.Plugin, error) {
 
-	// Keep these empty for now, use defaults
-	args := config.CoschedulingArgs{}
 	ctx := context.TODO()
 
 	// Make fluence his own little logger!
 	// This can eventually be a flag, but just going to set for now
 	// It shall be a very chonky file. Oh lawd he comin!
-	l := logger.NewDebugLogger(logger.LevelError, "/tmp/fluence.log")
+	l := logger.NewDebugLogger(logger.LevelDebug, "/tmp/fluence.log")
 
 	scheme := runtime.NewScheme()
 	_ = clientscheme.AddToScheme(scheme)
@@ -93,7 +101,7 @@ func New(obj runtime.Object, handle framework.Handle) (framework.Plugin, error)
 	fluxPodsInformer.AddIndexers(cache.Indexers{cache.NamespaceIndex: cache.MetaNamespaceIndexFunc})
 
 	// PermitWaitingTimeSeconds is the waiting timeout in seconds.
-	scheduleTimeDuration := time.Duration(args.PermitWaitingTimeSeconds) * time.Second
+	scheduleTimeDuration := time.Duration(permitWaitingTimeSeconds) * time.Second
 	pgMgr := fcore.NewPodGroupManager(
 		client,
 		handle.SnapshotSharedLister(),
@@ -110,11 +118,13 @@ func New(obj runtime.Object, handle framework.Handle) (framework.Plugin, error)
 	})
 	go fluxPodsInformer.Run(ctx.Done())
 
+	backoffSeconds := time.Duration(podGroupBackoffSeconds) * time.Second
 	plugin := &Fluence{
 		frameworkHandler: handle,
 		pgMgr:            pgMgr,
 		scheduleTimeout:  &scheduleTimeDuration,
 		log:              l,
+		pgBackoff:        &backoffSeconds,
 	}
 
 	// TODO this is not supported yet
@@ -219,16 +229,131 @@ func (f *Fluence) PreFilter(
 	node := f.pgMgr.GetPodNode(pod)
 	f.mutex.Unlock()
 	if node != "" {
+		f.log.Info("[Fluence PreFilter] assigned pod %s to node %s\n", pod.Name, node)
 		result := framework.PreFilterResult{NodeNames: sets.New(node)}
 		return &result, framework.NewStatus(framework.Success, "")
 	}
+	f.log.Info("[Fluence PreFilter] pod %s does not have a node assigned\n", pod.Name)
+
 	// This will populate the node name into the pod group manager
 	err := f.pgMgr.PreFilter(ctx, pod, state)
 	if err != nil {
-		f.log.Error("[Fluence PreFilter] failed pod %s: %s", klog.KObj(pod), err.Error())
+		f.log.Error("[Fluence PreFilter] failed pod %s: %s", pod.Name, err.Error())
 		return nil, framework.NewStatus(framework.UnschedulableAndUnresolvable, err.Error())
 	}
 	node = f.pgMgr.GetPodNode(pod)
 	result := framework.PreFilterResult{NodeNames: sets.New(node)}
 	return &result, framework.NewStatus(framework.Success, "")
 }
+
+// PostFilter is used to reject a group of pods if a pod does not pass PreFilter or Filter.
+func (f *Fluence) PostFilter(
+	ctx context.Context,
+	state *framework.CycleState,
+	pod *corev1.Pod,
+	filteredNodeStatusMap framework.NodeToStatusMap,
+) (*framework.PostFilterResult, *framework.Status) {
+
+	pgName, pg := f.pgMgr.GetPodGroup(ctx, pod)
+	if pg == nil {
+		f.log.Info("Pod does not belong to any group, pod %s", pod.Name)
+		return &framework.PostFilterResult{}, framework.NewStatus(framework.Unschedulable, "can not find pod group")
+	}
+
+	// This explicitly checks nodes, and we can skip scheduling another pod if we already
+	// have the minimum. For fluence since we expect an exact size this likely is not needed
+	assigned := f.pgMgr.CalculateAssignedPods(pg.Name, pod.Namespace)
+	if assigned >= int(pg.Spec.MinMember) {
+		f.log.Info("Assigned pods podGroup %s is assigned %s", pgName, assigned)
+		return &framework.PostFilterResult{}, framework.NewStatus(framework.Unschedulable)
+	}
+
+	// Took out percentage chcek here, doesn't make sense to me.
+
+	// It's based on an implicit assumption: if the nth Pod failed,
+	// it's inferrable other Pods belonging to the same PodGroup would be very likely to fail.
+	f.frameworkHandler.IterateOverWaitingPods(func(waitingPod framework.WaitingPod) {
+		if waitingPod.GetPod().Namespace == pod.Namespace && flabel.GetPodGroupLabel(waitingPod.GetPod()) == pg.Name {
+			f.log.Info("PostFilter rejects the pod for podGroup %s and pod %s", pgName, waitingPod.GetPod().Name)
+			waitingPod.Reject(f.Name(), "optimistic rejection in PostFilter")
+		}
+	})
+
+	if f.pgBackoff != nil {
+		pods, err := f.frameworkHandler.SharedInformerFactory().Core().V1().Pods().Lister().Pods(pod.Namespace).List(
+			labels.SelectorFromSet(labels.Set{v1alpha1.PodGroupLabel: flabel.GetPodGroupLabel(pod)}),
+		)
+		if err == nil && len(pods) >= int(pg.Spec.MinMember) {
+			f.pgMgr.BackoffPodGroup(pgName, *f.pgBackoff)
+		}
+	}
+
+	f.pgMgr.DeletePermittedPodGroup(pgName)
+	return &framework.PostFilterResult{}, framework.NewStatus(framework.Unschedulable,
+		fmt.Sprintf("PodGroup %v gets rejected due to Pod %v is unschedulable even after PostFilter", pgName, pod.Name))
+}
+
+// Permit is the functions invoked by the framework at "Permit" extension point.
+func (f *Fluence) Permit(
+	ctx context.Context,
+	state *framework.CycleState,
+	pod *corev1.Pod,
+	nodeName string,
+) (*framework.Status, time.Duration) {
+
+	f.log.Info("Checking permit for pod %s to node %s", pod.Name, nodeName)
+	waitTime := *f.scheduleTimeout
+	s := f.pgMgr.Permit(ctx, state, pod)
+	var retStatus *framework.Status
+	switch s {
+	case fcore.PodGroupNotSpecified:
+		f.log.Info("Checking permit for pod %s to node %s: PodGroupNotSpecified", pod.Name, nodeName)
+		return framework.NewStatus(framework.Success, ""), 0
+	case fcore.PodGroupNotFound:
+		f.log.Info("Checking permit for pod %s to node %s: PodGroupNotFound", pod.Name, nodeName)
+		return framework.NewStatus(framework.Unschedulable, "PodGroup not found"), 0
+	case fcore.Wait:
+		f.log.Info("Pod %s is waiting to be scheduled to node %s", pod.Name, nodeName)
+		_, pg := f.pgMgr.GetPodGroup(ctx, pod)
+		if wait := fgroup.GetWaitTimeDuration(pg, f.scheduleTimeout); wait != 0 {
+			waitTime = wait
+		}
+		retStatus = framework.NewStatus(framework.Wait)
+
+		// We will also request to move the sibling pods back to activeQ.
+		f.pgMgr.ActivateSiblings(pod, state)
+	case fcore.Success:
+		pgFullName := flabel.GetPodGroupFullName(pod)
+		f.frameworkHandler.IterateOverWaitingPods(func(waitingPod framework.WaitingPod) {
+			if flabel.GetPodGroupFullName(waitingPod.GetPod()) == pgFullName {
+				f.log.Info("Permit allows pod %s", waitingPod.GetPod().Name)
+				waitingPod.Allow(f.Name())
+			}
+		})
+		f.log.Info("Permit allows pod %s", pod.Name)
+		retStatus = framework.NewStatus(framework.Success)
+		waitTime = 0
+	}
+
+	return retStatus, waitTime
+}
+
+// Reserve is the functions invoked by the framework at "reserve" extension point.
+func (f *Fluence) Reserve(ctx context.Context, state *framework.CycleState, pod *corev1.Pod, nodeName string) *framework.Status {
+	return nil
+}
+
+// Unreserve rejects all other Pods in the PodGroup when one of the pods in the group times out.
+func (f *Fluence) Unreserve(ctx context.Context, state *framework.CycleState, pod *corev1.Pod, nodeName string) {
+	pgName, pg := f.pgMgr.GetPodGroup(ctx, pod)
+	if pg == nil {
+		return
+	}
+	f.frameworkHandler.IterateOverWaitingPods(func(waitingPod framework.WaitingPod) {
+		if waitingPod.GetPod().Namespace == pod.Namespace && flabel.GetPodGroupLabel(waitingPod.GetPod()) == pg.Name {
+			f.log.Info("Unreserve rejects pod %s in group %s", waitingPod.GetPod().Name, pgName)
+			waitingPod.Reject(f.Name(), "rejection in Unreserve")
+		}
+	})
+	f.pgMgr.DeletePermittedPodGroup(pgName)
+}
diff --git a/sig-scheduler-plugins/pkg/fluence/group/group.go b/sig-scheduler-plugins/pkg/fluence/group/group.go
index 0ee0831..dd039e3 100644
--- a/sig-scheduler-plugins/pkg/fluence/group/group.go
+++ b/sig-scheduler-plugins/pkg/fluence/group/group.go
@@ -2,6 +2,7 @@ package group
 
 import (
 	"fmt"
+	"time"
 
 	corev1 "k8s.io/api/core/v1"
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
@@ -11,6 +12,9 @@ import (
 	sched "sigs.k8s.io/scheduler-plugins/apis/scheduling/v1alpha1"
 )
 
+// DefaultWaitTime is 60s if ScheduleTimeoutSeconds is not specified.
+const DefaultWaitTime = 60 * time.Second
+
 // CreateFakeGroup wraps an arbitrary pod in a fake group for fluence to schedule
 // This happens only in PreFilter so we already sorted
 func CreateFakeGroup(pod *corev1.Pod) *sched.PodGroup {
@@ -44,3 +48,17 @@ func GetCreationTimestamp(groupName string, pg *sched.PodGroup, podInfo *framewo
 	klog.Errorf("   [Fluence] Pod group %s time IsZero, we should not have reached here", groupName)
 	return metav1.NewMicroTime(*podInfo.InitialAttemptTimestamp)
 }
+
+// GetWaitTimeDuration returns a wait timeout based on the following precedences:
+// 1. spec.scheduleTimeoutSeconds of the given pg, if specified
+// 2. given scheduleTimeout, if not nil
+// 3. fall back to DefaultWaitTime
+func GetWaitTimeDuration(pg *sched.PodGroup, scheduleTimeout *time.Duration) time.Duration {
+	if pg != nil && pg.Spec.ScheduleTimeoutSeconds != nil {
+		return time.Duration(*pg.Spec.ScheduleTimeoutSeconds) * time.Second
+	}
+	if scheduleTimeout != nil && *scheduleTimeout != 0 {
+		return *scheduleTimeout
+	}
+	return DefaultWaitTime
+}
diff --git a/sig-scheduler-plugins/pkg/logger/logger.go b/sig-scheduler-plugins/pkg/logger/logger.go
index 053021a..d1e238e 100644
--- a/sig-scheduler-plugins/pkg/logger/logger.go
+++ b/sig-scheduler-plugins/pkg/logger/logger.go
@@ -79,8 +79,8 @@ func (l *DebugLogger) log(level int, prefix string, message ...any) error {
 	rest := message[1:]
 
 	//	msg := fmt.Sprintf(message...)
-	fmt.Printf("Compariing level %d >= %d\n", level, l.level)
-	if level >= l.level {
+	fmt.Printf("Compariing level %d <= %d\n", level, l.level)
+	if level <= l.level {
 		logger.Printf(prolog, rest...)
 	}
 	return l.Stop()

From ef0ed50b1bcfefc30285024cff1c538f66ad62e2 Mon Sep 17 00:00:00 2001
From: vsoch <vsoch@users.noreply.github.com>
Date: Fri, 19 Apr 2024 23:53:08 -0600
Subject: [PATCH 26/28] go: update to 1.21

Problem: we need to update to a newer go to keep up
with the sig-scheduler upstream, and also the rainbow
scheduler integration.
Solution: upgrade to 1.21. This also required some
refactor of the main.go and fluence due to changes
in function signatures. This is a test to see if
tests are passing - the fluxion-go bindings used
here are from a branch (not merged yet) that can
be used for the PR this one is going into, and
before merging that final one we should merge
and release the bindings more properly.

Signed-off-by: vsoch <vsoch@users.noreply.github.com>
---
 sig-scheduler-plugins/cmd/controller/app/server.go | 8 +++++---
 sig-scheduler-plugins/pkg/fluence/core/core.go     | 1 +
 sig-scheduler-plugins/pkg/fluence/fluence.go       | 5 +++--
 src/build/scheduler/Dockerfile                     | 4 ++--
 src/fluence/go.mod                                 | 4 ++--
 src/fluence/go.sum                                 | 2 ++
 6 files changed, 15 insertions(+), 9 deletions(-)

diff --git a/sig-scheduler-plugins/cmd/controller/app/server.go b/sig-scheduler-plugins/cmd/controller/app/server.go
index d42c0f4..aae8625 100644
--- a/sig-scheduler-plugins/cmd/controller/app/server.go
+++ b/sig-scheduler-plugins/cmd/controller/app/server.go
@@ -27,6 +27,7 @@ import (
 
 	"sigs.k8s.io/controller-runtime/pkg/webhook"
 
+	metricsserver "sigs.k8s.io/controller-runtime/pkg/metrics/server"
 	api "sigs.k8s.io/scheduler-plugins/apis/scheduling/v1alpha1"
 	"sigs.k8s.io/scheduler-plugins/pkg/controllers"
 )
@@ -50,9 +51,10 @@ func Run(s *ServerRunOptions) error {
 	// Controller Runtime Controllers
 	ctrl.SetLogger(klogr.New())
 	mgr, err := ctrl.NewManager(ctrl.GetConfigOrDie(), ctrl.Options{
-		Scheme:                  scheme,
-		MetricsBindAddress:      s.MetricsAddr,
-		Port:                    9443,
+		Scheme: scheme,
+		Metrics: metricsserver.Options{
+			BindAddress: s.MetricsAddr,
+		},
 		HealthProbeBindAddress:  s.ProbeAddr,
 		LeaderElection:          s.EnableLeaderElection,
 		LeaderElectionID:        "sched-plugins-controllers",
diff --git a/sig-scheduler-plugins/pkg/fluence/core/core.go b/sig-scheduler-plugins/pkg/fluence/core/core.go
index ea300ce..7f1e052 100644
--- a/sig-scheduler-plugins/pkg/fluence/core/core.go
+++ b/sig-scheduler-plugins/pkg/fluence/core/core.go
@@ -293,6 +293,7 @@ func (pgMgr *PodGroupManager) PreFilter(
 	// it may not necessarily pass Filter due to other constraints such as affinity/taints.
 	_, ok := pgMgr.permittedPG.Get(pgFullName)
 	if ok {
+		pgMgr.log.Info("[PodGroup PreFilter] Pod Group %s is already admitted", pgFullName)
 		return nil
 	}
 
diff --git a/sig-scheduler-plugins/pkg/fluence/fluence.go b/sig-scheduler-plugins/pkg/fluence/fluence.go
index 099d2f3..fe113d6 100644
--- a/sig-scheduler-plugins/pkg/fluence/fluence.go
+++ b/sig-scheduler-plugins/pkg/fluence/fluence.go
@@ -67,7 +67,8 @@ var (
 
 	_ framework.EnqueueExtensions = &Fluence{}
 
-	permitWaitingTimeSeconds int64 = 60
+	// Set to be the same as coscheduling
+	permitWaitingTimeSeconds int64 = 300
 	podGroupBackoffSeconds   int64 = 0
 )
 
@@ -77,7 +78,7 @@ const (
 )
 
 // Initialize and return a new Fluence Custom Scheduler Plugin
-func New(obj runtime.Object, handle framework.Handle) (framework.Plugin, error) {
+func New(_ context.Context, obj runtime.Object, handle framework.Handle) (framework.Plugin, error) {
 
 	ctx := context.TODO()
 
diff --git a/src/build/scheduler/Dockerfile b/src/build/scheduler/Dockerfile
index 67bd5ce..2a8892c 100644
--- a/src/build/scheduler/Dockerfile
+++ b/src/build/scheduler/Dockerfile
@@ -2,11 +2,11 @@ FROM fluxrm/flux-sched:jammy
 
 USER root
 ENV DEBIAN_FRONTEND=noninteractive
-ENV GO_VERSION=1.19.10
+ENV GO_VERSION=1.21.9
 
 RUN apt-get update && apt-get clean -y && apt -y autoremove
 
-# Install go 19.10
+# Install go
 RUN wget https://go.dev/dl/go${GO_VERSION}.linux-amd64.tar.gz  && tar -xvf go${GO_VERSION}.linux-amd64.tar.gz && \
     mv go /usr/local && rm go${GO_VERSION}.linux-amd64.tar.gz
 
diff --git a/src/fluence/go.mod b/src/fluence/go.mod
index 5c57652..01fc126 100644
--- a/src/fluence/go.mod
+++ b/src/fluence/go.mod
@@ -1,9 +1,9 @@
 module github.com/flux-framework/flux-k8s/flux-plugin/fluence
 
-go 1.19
+go 1.21
 
 require (
-	github.com/flux-framework/fluxion-go v0.32.0
+	github.com/flux-framework/fluxion-go v0.32.1-0.20240420052153-909523c84ca2
 	google.golang.org/grpc v1.38.0
 	google.golang.org/protobuf v1.26.0
 	gopkg.in/yaml.v2 v2.4.0
diff --git a/src/fluence/go.sum b/src/fluence/go.sum
index 5700215..534497d 100644
--- a/src/fluence/go.sum
+++ b/src/fluence/go.sum
@@ -100,6 +100,8 @@ github.com/fatih/color v1.7.0/go.mod h1:Zm6kSWBoL9eyXnKyktHP6abPY2pDugNf5Kwzbycv
 github.com/felixge/httpsnoop v1.0.1/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U=
 github.com/flux-framework/fluxion-go v0.32.0 h1:NY6Y1mlTTTZhHD+CmAsDsdNTxUsAFDQoORpMZj8NFLI=
 github.com/flux-framework/fluxion-go v0.32.0/go.mod h1:ZI3QxSvUfgJE2Snur/SntJmVfpMjr6D4ICVmdqJ9fkQ=
+github.com/flux-framework/fluxion-go v0.32.1-0.20240420052153-909523c84ca2 h1:Yz/vVX0XfB2q51ZLh2p8YI5vphvv0rZF4PqtKPscvsY=
+github.com/flux-framework/fluxion-go v0.32.1-0.20240420052153-909523c84ca2/go.mod h1:jA5+kOSLxchFzixzYEvMAGjkXB5yszO/HxUwdhX/5/U=
 github.com/form3tech-oss/jwt-go v3.2.2+incompatible/go.mod h1:pbq4aXjuKjdthFRnoDwaVPLA+WlJuPGy+QneDUgJi2k=
 github.com/form3tech-oss/jwt-go v3.2.3+incompatible/go.mod h1:pbq4aXjuKjdthFRnoDwaVPLA+WlJuPGy+QneDUgJi2k=
 github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo=

From 3bd9cb508bb515ba4bbadde2a9b528550c110209 Mon Sep 17 00:00:00 2001
From: vsoch <vsoch@users.noreply.github.com>
Date: Thu, 2 May 2024 18:42:36 -0600
Subject: [PATCH 27/28] naming: expand short named variables

Problem: a lot of the variables with pg are hard to understand
Solution: write out podGroup or groupName explicitly.
Signed-off-by: vsoch <vsoch@users.noreply.github.com>
---
 README.md                                     |   2 +-
 hack/quick-build-kind.sh                      |  36 +++++
 hack/quick-build.sh                           |  22 +--
 .../pkg/fluence/core/core.go                  | 143 +++++++++---------
 .../pkg/fluence/core/flux.go                  | 134 ++++++++--------
 sig-scheduler-plugins/pkg/fluence/fluence.go  |  76 +++++-----
 .../pkg/fluence/group/group.go                |  18 +--
 .../pkg/fluence/labels/labels.go              |   6 +-
 8 files changed, 226 insertions(+), 211 deletions(-)
 create mode 100755 hack/quick-build-kind.sh

diff --git a/README.md b/README.md
index ff3327e..515d313 100644
--- a/README.md
+++ b/README.md
@@ -503,7 +503,7 @@ I was having trouble developing this easily because it's a lot of steps to build
 The last step ensures we use the images we loaded! You can basically just do:
 
 ```bash
-/bin/bash ./hack/quick-build.sh
+/bin/bash ./hack/quick-build-kind.sh
 ```
 
 This sped up my development time immensely. If you want to manually do the steps, see that script for instructions.
diff --git a/hack/quick-build-kind.sh b/hack/quick-build-kind.sh
new file mode 100755
index 0000000..23a5c87
--- /dev/null
+++ b/hack/quick-build-kind.sh
@@ -0,0 +1,36 @@
+#!/bin/bash
+
+# Before running this, you should:
+# 1. create the kind cluster (needs more than one node, fluence does not scheduler to the control plane)
+# 2. Install cert-manager
+# 3. Customize the script to point to your registry if you intend to push
+
+REGISTRY="${1:-ghcr.io/vsoch}"
+HERE=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
+ROOT=$(dirname ${HERE})
+
+# Go to the script directory
+cd ${ROOT}
+
+# These build each of the images. The sidecar is separate from the other two in src/
+make REGISTRY=${REGISTRY} SCHEDULER_IMAGE=fluence SIDECAR_IMAGE=fluence-sidecar CONTROLLER_IMAGE=fluence-controller
+
+# This is what it might look like to push
+# docker push ghcr.io/vsoch/fluence-sidecar && docker push ghcr.io/vsoch/fluence-controller && docker push ghcr.io/vsoch/fluence:latest
+
+# We load into kind so we don't need to push/pull and use up internet data ;)
+kind load docker-image ${REGISTRY}/fluence-sidecar:latest
+kind load docker-image ${REGISTRY}/fluence-controller:latest
+kind load docker-image ${REGISTRY}/fluence:latest
+
+# And then install using the charts. The pull policy ensures we use the loaded ones
+cd ${ROOT}/upstream/manifests/install/charts
+helm uninstall fluence || true
+helm install \
+  --set scheduler.image=${REGISTRY}/fluence:latest \
+  --set scheduler.sidecarPullPolicy=Never \
+  --set scheduler.pullPolicy=Never \
+  --set controller.pullPolicy=Never \
+  --set controller.image=${REGISTRY}/fluence-controller:latest \
+  --set scheduler.sidecarimage=${REGISTRY}/fluence-sidecar:latest \
+        fluence as-a-second-scheduler/
diff --git a/hack/quick-build.sh b/hack/quick-build.sh
index 23a5c87..c9b8eff 100755
--- a/hack/quick-build.sh
+++ b/hack/quick-build.sh
@@ -13,24 +13,4 @@ ROOT=$(dirname ${HERE})
 cd ${ROOT}
 
 # These build each of the images. The sidecar is separate from the other two in src/
-make REGISTRY=${REGISTRY} SCHEDULER_IMAGE=fluence SIDECAR_IMAGE=fluence-sidecar CONTROLLER_IMAGE=fluence-controller
-
-# This is what it might look like to push
-# docker push ghcr.io/vsoch/fluence-sidecar && docker push ghcr.io/vsoch/fluence-controller && docker push ghcr.io/vsoch/fluence:latest
-
-# We load into kind so we don't need to push/pull and use up internet data ;)
-kind load docker-image ${REGISTRY}/fluence-sidecar:latest
-kind load docker-image ${REGISTRY}/fluence-controller:latest
-kind load docker-image ${REGISTRY}/fluence:latest
-
-# And then install using the charts. The pull policy ensures we use the loaded ones
-cd ${ROOT}/upstream/manifests/install/charts
-helm uninstall fluence || true
-helm install \
-  --set scheduler.image=${REGISTRY}/fluence:latest \
-  --set scheduler.sidecarPullPolicy=Never \
-  --set scheduler.pullPolicy=Never \
-  --set controller.pullPolicy=Never \
-  --set controller.image=${REGISTRY}/fluence-controller:latest \
-  --set scheduler.sidecarimage=${REGISTRY}/fluence-sidecar:latest \
-        fluence as-a-second-scheduler/
+make REGISTRY=${REGISTRY} SCHEDULER_IMAGE=fluence SIDECAR_IMAGE=fluence-sidecar CONTROLLER_IMAGE=fluence-controller
\ No newline at end of file
diff --git a/sig-scheduler-plugins/pkg/fluence/core/core.go b/sig-scheduler-plugins/pkg/fluence/core/core.go
index 7f1e052..a74e749 100644
--- a/sig-scheduler-plugins/pkg/fluence/core/core.go
+++ b/sig-scheduler-plugins/pkg/fluence/core/core.go
@@ -95,10 +95,10 @@ type PodGroupManager struct {
 	// scheduleTimeout is the default timeout for podgroup scheduling.
 	// If podgroup's scheduleTimeoutSeconds is set, it will be used.
 	scheduleTimeout *time.Duration
-	// permittedPG stores the podgroup name which has passed the pre resource check.
-	permittedPG *gochache.Cache
-	// backedOffPG stores the podgorup name which failed scheduling recently.
-	backedOffPG *gochache.Cache
+	// permittedpodGroup stores the podgroup name which has passed the pre resource check.
+	permittedpodGroup *gochache.Cache
+	// backedOffpodGroup stores the podgorup name which failed scheduling recently.
+	backedOffpodGroup *gochache.Cache
 	// podLister is pod lister
 	podLister listerv1.PodLister
 
@@ -122,32 +122,32 @@ func NewPodGroupManager(
 	podInformer informerv1.PodInformer,
 	log *logger.DebugLogger,
 ) *PodGroupManager {
-	pgMgr := &PodGroupManager{
+	podGroupManager := &PodGroupManager{
 		client:               client,
 		snapshotSharedLister: snapshotSharedLister,
 		scheduleTimeout:      scheduleTimeout,
 		podLister:            podInformer.Lister(),
-		permittedPG:          gochache.New(3*time.Second, 3*time.Second),
-		backedOffPG:          gochache.New(10*time.Second, 10*time.Second),
+		permittedpodGroup:    gochache.New(3*time.Second, 3*time.Second),
+		backedOffpodGroup:    gochache.New(10*time.Second, 10*time.Second),
 		groupToJobId:         map[string]uint64{},
 		podToNode:            map[string]string{},
 		log:                  log,
 	}
-	return pgMgr
+	return podGroupManager
 }
 
-func (pgMgr *PodGroupManager) BackoffPodGroup(pgName string, backoff time.Duration) {
+func (podGroupManager *PodGroupManager) BackoffPodGroup(groupName string, backoff time.Duration) {
 	if backoff == time.Duration(0) {
 		return
 	}
-	pgMgr.backedOffPG.Add(pgName, nil, backoff)
+	podGroupManager.backedOffpodGroup.Add(groupName, nil, backoff)
 }
 
 // ActivateSiblings stashes the pods belonging to the same PodGroup of the given pod
 // in the given state, with a reserved key "kubernetes.io/pods-to-activate".
-func (pgMgr *PodGroupManager) ActivateSiblings(pod *corev1.Pod, state *framework.CycleState) {
-	pgName := util.GetPodGroupLabel(pod)
-	if pgName == "" {
+func (podGroupManager *PodGroupManager) ActivateSiblings(pod *corev1.Pod, state *framework.CycleState) {
+	groupName := util.GetPodGroupLabel(pod)
+	if groupName == "" {
 		return
 	}
 
@@ -158,11 +158,11 @@ func (pgMgr *PodGroupManager) ActivateSiblings(pod *corev1.Pod, state *framework
 		return
 	}
 
-	pods, err := pgMgr.podLister.Pods(pod.Namespace).List(
-		labels.SelectorFromSet(labels.Set{v1alpha1.PodGroupLabel: pgName}),
+	pods, err := podGroupManager.podLister.Pods(pod.Namespace).List(
+		labels.SelectorFromSet(labels.Set{v1alpha1.PodGroupLabel: groupName}),
 	)
 	if err != nil {
-		klog.ErrorS(err, "Failed to obtain pods belong to a PodGroup", "podGroup", pgName)
+		klog.ErrorS(err, "Failed to obtain pods belong to a PodGroup", "podGroup", groupName)
 		return
 	}
 
@@ -188,40 +188,39 @@ func (pgMgr *PodGroupManager) ActivateSiblings(pod *corev1.Pod, state *framework
 }
 
 // GetStatuses string (of all pods) to show for debugging purposes
-func (pgMgr *PodGroupManager) GetStatuses(
+func (podGroupManager *PodGroupManager) GetStatuses(
 	pods []*corev1.Pod,
-	pod *corev1.Pod,
 ) string {
 	statuses := ""
 
 	// We need to distinguish 0 from the default and not finding anything
-	for _, p := range pods {
-		statuses += " " + fmt.Sprintf("%s", p.Status.Phase)
+	for _, pod := range pods {
+		statuses += " " + fmt.Sprintf("%s", pod.Status.Phase)
 	}
 	return statuses
 }
 
 // GetPodNode is a quick lookup to see if we have a node
-func (pgMgr *PodGroupManager) GetPodNode(pod *corev1.Pod) string {
-	node, _ := pgMgr.podToNode[pod.Name]
+func (podGroupManager *PodGroupManager) GetPodNode(pod *corev1.Pod) string {
+	node, _ := podGroupManager.podToNode[pod.Name]
 	return node
 }
 
 // Permit permits a pod to run, if the minMember match, it would send a signal to chan.
-func (pgMgr *PodGroupManager) Permit(ctx context.Context, state *framework.CycleState, pod *corev1.Pod) Status {
-	pgFullName, pg := pgMgr.GetPodGroup(ctx, pod)
-	if pgFullName == "" {
+func (podGroupManager *PodGroupManager) Permit(ctx context.Context, state *framework.CycleState, pod *corev1.Pod) Status {
+	groupName, podGroup := podGroupManager.GetPodGroup(ctx, pod)
+	if groupName == "" {
 		return PodGroupNotSpecified
 	}
-	if pg == nil {
+	if podGroup == nil {
 		// A Pod with a podGroup name but without a PodGroup found is denied.
 		return PodGroupNotFound
 	}
 
-	assigned := pgMgr.CalculateAssignedPods(pg.Name, pg.Namespace)
+	assigned := podGroupManager.CalculateAssignedPods(podGroup.Name, podGroup.Namespace)
 	// The number of pods that have been assigned nodes is calculated from the snapshot.
 	// The current pod in not included in the snapshot during the current scheduling cycle.
-	if int32(assigned)+1 >= pg.Spec.MinMember {
+	if int32(assigned)+1 >= podGroup.Spec.MinMember {
 		return Success
 	}
 
@@ -244,24 +243,24 @@ func (pgMgr *PodGroupManager) Permit(ctx context.Context, state *framework.Cycle
 // 1. it belongs to a podgroup that was recently denied or
 // 2. the total number of pods in the podgroup is less than the minimum number of pods
 // that is required to be scheduled.
-func (pgMgr *PodGroupManager) PreFilter(
+func (podGroupManager *PodGroupManager) PreFilter(
 	ctx context.Context,
 	pod *corev1.Pod,
 	state *framework.CycleState,
 ) error {
 
-	pgMgr.log.Info("[PodGroup PreFilter] pod %s", klog.KObj(pod))
-	pgFullName, pg := pgMgr.GetPodGroup(ctx, pod)
-	if pg == nil {
+	podGroupManager.log.Info("[PodGroup PreFilter] pod %s", klog.KObj(pod))
+	groupName, podGroup := podGroupManager.GetPodGroup(ctx, pod)
+	if podGroup == nil {
 		return nil
 	}
 
-	_, exist := pgMgr.backedOffPG.Get(pgFullName)
+	_, exist := podGroupManager.backedOffpodGroup.Get(groupName)
 	if exist {
-		return fmt.Errorf("podGroup %v failed recently", pgFullName)
+		return fmt.Errorf("podGroup %v failed recently", groupName)
 	}
 
-	pods, err := pgMgr.podLister.Pods(pod.Namespace).List(
+	pods, err := podGroupManager.podLister.Pods(pod.Namespace).List(
 		labels.SelectorFromSet(labels.Set{v1alpha1.PodGroupLabel: util.GetPodGroupLabel(pod)}),
 	)
 	if err != nil {
@@ -271,18 +270,18 @@ func (pgMgr *PodGroupManager) PreFilter(
 	// Only allow scheduling the first in the group so the others come after
 
 	// Get statuses to show for debugging
-	statuses := pgMgr.GetStatuses(pods, pod)
+	statuses := podGroupManager.GetStatuses(pods)
 
 	// This shows us the number of pods we have in the set and their states
-	pgMgr.log.Info("[PodGroup PreFilter] group: %s pods: %s MinMember: %d Size: %d", pgFullName, statuses, pg.Spec.MinMember, len(pods))
-	if len(pods) < int(pg.Spec.MinMember) {
+	podGroupManager.log.Info("[PodGroup PreFilter] group: %s pods: %s MinMember: %d Size: %d", groupName, statuses, podGroup.Spec.MinMember, len(pods))
+	if len(pods) < int(podGroup.Spec.MinMember) {
 		return fmt.Errorf("pre-filter pod %v cannot find enough sibling pods, "+
-			"current pods number: %v, minMember of group: %v", pod.Name, len(pods), pg.Spec.MinMember)
+			"current pods number: %v, minMember of group: %v", pod.Name, len(pods), podGroup.Spec.MinMember)
 	}
 
 	// TODO we likely can take advantage of these resources or other custom
 	// attributes we add. For now ignore and calculate based on pod needs (above)
-	// if pg.Spec.MinResources == nil {
+	// if podGroup.Spec.MinResources == nil {
 	//	fmt.Printf("Fluence Min resources are null, skipping PreFilter")
 	//	return nil
 	// }
@@ -291,28 +290,28 @@ func (pgMgr *PodGroupManager) PreFilter(
 	// TODO(cwdsuzhou): This resource check may not always pre-catch unschedulable pod group.
 	// It only tries to PreFilter resource constraints so even if a PodGroup passed here,
 	// it may not necessarily pass Filter due to other constraints such as affinity/taints.
-	_, ok := pgMgr.permittedPG.Get(pgFullName)
+	_, ok := podGroupManager.permittedpodGroup.Get(groupName)
 	if ok {
-		pgMgr.log.Info("[PodGroup PreFilter] Pod Group %s is already admitted", pgFullName)
+		podGroupManager.log.Info("[PodGroup PreFilter] Pod Group %s is already admitted", groupName)
 		return nil
 	}
 
-	// TODO: right now we ask Fluxion for a podspec based on ONE pod, but
+	// TODO: right now we ask Fluxion for a podspec based on ONE representative pod, but
 	// we have the whole group! We can handle different pod needs now :)
 	repPod := pods[0]
-	nodes, err := pgMgr.AskFlux(ctx, *repPod, pg, pgFullName)
+	nodes, err := podGroupManager.AskFlux(ctx, *repPod, podGroup, groupName)
 	if err != nil {
-		pgMgr.log.Info("[PodGroup PreFilter] Fluxion returned an error %s, not schedulable", err.Error())
+		podGroupManager.log.Info("[PodGroup PreFilter] Fluxion returned an error %s, not schedulable", err.Error())
 		return err
 	}
-	pgMgr.log.Info("Node Selected %s (pod group %s)", nodes, pgFullName)
+	podGroupManager.log.Info("Node Selected %s (pod group %s)", nodes, groupName)
 
 	// Some reason fluxion gave us the wrong size?
 	if len(nodes) != len(pods) {
-		pgMgr.log.Warning("[PodGroup PreFilter] group %s needs %d nodes but Fluxion returned the wrong number nodes %d.", pgFullName, len(pods), len(nodes))
-		pgMgr.mutex.Lock()
-		pgMgr.cancelFluxJob(pgFullName, repPod)
-		pgMgr.mutex.Unlock()
+		podGroupManager.log.Warning("[PodGroup PreFilter] group %s needs %d nodes but Fluxion returned the wrong number nodes %d.", groupName, len(pods), len(nodes))
+		podGroupManager.mutex.Lock()
+		podGroupManager.cancelFluxJob(groupName, repPod)
+		podGroupManager.mutex.Unlock()
 	}
 
 	// Create a fluxState (CycleState) with all nodes - this is used to retrieve
@@ -324,32 +323,32 @@ func (pgMgr *PodGroupManager) PreFilter(
 		stateData := FluxStateData{NodeName: node}
 		state.Write(framework.StateKey(pod.Name), &stateData)
 		// Also save to the podToNode lookup
-		pgMgr.mutex.Lock()
-		pgMgr.podToNode[pod.Name] = node
-		pgMgr.mutex.Unlock()
+		podGroupManager.mutex.Lock()
+		podGroupManager.podToNode[pod.Name] = node
+		podGroupManager.mutex.Unlock()
 	}
-	pgMgr.permittedPG.Add(pgFullName, pgFullName, *pgMgr.scheduleTimeout)
+	podGroupManager.permittedpodGroup.Add(groupName, groupName, *podGroupManager.scheduleTimeout)
 	return nil
 }
 
 // GetCreationTimestamp returns the creation time of a podGroup or a pod.
-func (pgMgr *PodGroupManager) GetCreationTimestamp(pod *corev1.Pod, ts time.Time) time.Time {
-	pgName := util.GetPodGroupLabel(pod)
-	if len(pgName) == 0 {
+func (podGroupManager *PodGroupManager) GetCreationTimestamp(pod *corev1.Pod, ts time.Time) time.Time {
+	groupName := util.GetPodGroupLabel(pod)
+	if len(groupName) == 0 {
 		return ts
 	}
-	var pg v1alpha1.PodGroup
-	if err := pgMgr.client.Get(context.TODO(), types.NamespacedName{Namespace: pod.Namespace, Name: pgName}, &pg); err != nil {
+	var podGroup v1alpha1.PodGroup
+	if err := podGroupManager.client.Get(context.TODO(), types.NamespacedName{Namespace: pod.Namespace, Name: groupName}, &podGroup); err != nil {
 		return ts
 	}
-	return pg.CreationTimestamp.Time
+	return podGroup.CreationTimestamp.Time
 }
 
 // CalculateAssignedPods returns the number of pods that has been assigned nodes: assumed or bound.
-func (pgMgr *PodGroupManager) CalculateAssignedPods(podGroupName, namespace string) int {
-	nodeInfos, err := pgMgr.snapshotSharedLister.NodeInfos().List()
+func (podGroupManager *PodGroupManager) CalculateAssignedPods(podGroupName, namespace string) int {
+	nodeInfos, err := podGroupManager.snapshotSharedLister.NodeInfos().List()
 	if err != nil {
-		pgMgr.log.Error("Cannot get nodeInfos from frameworkHandle: %s", err)
+		podGroupManager.log.Error("Cannot get nodeInfos from frameworkHandle: %s", err)
 		return 0
 	}
 	var count int
@@ -365,21 +364,21 @@ func (pgMgr *PodGroupManager) CalculateAssignedPods(podGroupName, namespace stri
 }
 
 // DeletePermittedPodGroup deletes a podGroup that passes Pre-Filter but reaches PostFilter.
-func (pgMgr *PodGroupManager) DeletePermittedPodGroup(pgFullName string) {
-	pgMgr.permittedPG.Delete(pgFullName)
+func (podGroupManager *PodGroupManager) DeletePermittedPodGroup(groupName string) {
+	podGroupManager.permittedpodGroup.Delete(groupName)
 }
 
 // GetPodGroup returns the PodGroup that a Pod belongs to in cache.
-func (pgMgr *PodGroupManager) GetPodGroup(ctx context.Context, pod *corev1.Pod) (string, *v1alpha1.PodGroup) {
-	pgName := util.GetPodGroupLabel(pod)
-	if len(pgName) == 0 {
+func (podGroupManager *PodGroupManager) GetPodGroup(ctx context.Context, pod *corev1.Pod) (string, *v1alpha1.PodGroup) {
+	groupName := util.GetPodGroupLabel(pod)
+	if len(groupName) == 0 {
 		return "", nil
 	}
-	var pg v1alpha1.PodGroup
-	if err := pgMgr.client.Get(ctx, types.NamespacedName{Namespace: pod.Namespace, Name: pgName}, &pg); err != nil {
-		return fmt.Sprintf("%v/%v", pod.Namespace, pgName), nil
+	var podGroup v1alpha1.PodGroup
+	if err := podGroupManager.client.Get(ctx, types.NamespacedName{Namespace: pod.Namespace, Name: groupName}, &podGroup); err != nil {
+		return fmt.Sprintf("%v/%v", pod.Namespace, groupName), nil
 	}
-	return fmt.Sprintf("%v/%v", pod.Namespace, pgName), &pg
+	return fmt.Sprintf("%v/%v", pod.Namespace, groupName), &podGroup
 }
 
 // GetNamespacedName returns the namespaced name.
diff --git a/sig-scheduler-plugins/pkg/fluence/core/flux.go b/sig-scheduler-plugins/pkg/fluence/core/flux.go
index 48e1500..50c8ff1 100644
--- a/sig-scheduler-plugins/pkg/fluence/core/flux.go
+++ b/sig-scheduler-plugins/pkg/fluence/core/flux.go
@@ -17,17 +17,17 @@ import (
 
 // AskFlux will ask flux for an allocation for nodes for the pod group.
 // We return the list of nodes, and assign to the entire group!
-func (pgMgr *PodGroupManager) AskFlux(
+func (podGroupManager *PodGroupManager) AskFlux(
 	ctx context.Context,
 	pod corev1.Pod,
-	pg *v1alpha1.PodGroup,
+	podGroup *v1alpha1.PodGroup,
 	groupName string,
 ) ([]string, error) {
 
 	// clean up previous match if a pod has already allocated previously
-	pgMgr.mutex.Lock()
-	_, isAllocated := pgMgr.groupToJobId[groupName]
-	pgMgr.mutex.Unlock()
+	podGroupManager.mutex.Lock()
+	_, isAllocated := podGroupManager.groupToJobId[groupName]
+	podGroupManager.mutex.Unlock()
 
 	// This case happens when there is some reason that an initial job pods partially allocated,
 	// but then the job restarted, and new pods are present but fluence had assigned nodes to
@@ -37,10 +37,10 @@ func (pgMgr *PodGroupManager) AskFlux(
 	// cancel in fluence. What we can do here is assume the previous pods are no longer running
 	// and cancel the flux job to create again.
 	if isAllocated {
-		pgMgr.log.Warning("[PodGroup AskFlux] group %s was previously allocated and is requesting again, so must have completed.", groupName)
-		pgMgr.mutex.Lock()
-		pgMgr.cancelFluxJob(groupName, &pod)
-		pgMgr.mutex.Unlock()
+		podGroupManager.log.Warning("[PodGroup AskFlux] group %s was previously allocated and is requesting again, so must have completed.", groupName)
+		podGroupManager.mutex.Lock()
+		podGroupManager.cancelFluxJob(groupName, &pod)
+		podGroupManager.mutex.Unlock()
 	}
 	nodes := []string{}
 
@@ -48,12 +48,12 @@ func (pgMgr *PodGroupManager) AskFlux(
 	// This obviously may not be true if we have a hetereogenous PodGroup.
 	// We name it based on the group, since it will represent the group
 	jobspec := utils.PreparePodJobSpec(&pod, groupName)
-	pgMgr.log.Info("[PodGroup AskFlux] Inspect pod info, jobspec: %s\n", jobspec)
+	podGroupManager.log.Info("[PodGroup AskFlux] Inspect pod info, jobspec: %s\n", jobspec)
 	conn, err := grpc.Dial("127.0.0.1:4242", grpc.WithInsecure())
 
 	// TODO change this to just return fmt.Errorf
 	if err != nil {
-		pgMgr.log.Error("[PodGroup AskFlux] Error connecting to server: %v\n", err)
+		podGroupManager.log.Error("[PodGroup AskFlux] Error connecting to server: %v\n", err)
 		return nodes, err
 	}
 	defer conn.Close()
@@ -65,18 +65,18 @@ func (pgMgr *PodGroupManager) AskFlux(
 	request := &pb.MatchRequest{
 		Ps:      jobspec,
 		Request: "allocate",
-		Count:   pg.Spec.MinMember,
+		Count:   podGroup.Spec.MinMember,
 	}
 
 	// An error here is an error with making the request
 	r, err := grpcclient.Match(context.Background(), request)
 	if err != nil {
-		pgMgr.log.Warning("[PodGroup AskFlux] did not receive any match response: %v\n", err)
+		podGroupManager.log.Warning("[PodGroup AskFlux] did not receive any match response: %v\n", err)
 		return nodes, err
 	}
 
 	// TODO GetPodID should be renamed, because it will reflect the group
-	pgMgr.log.Info("[PodGroup AskFlux] Match response ID %s\n", r.GetPodID())
+	podGroupManager.log.Info("[PodGroup AskFlux] Match response ID %s\n", r.GetPodID())
 
 	// Get the nodelist and inspect
 	nodelist := r.GetNodelist()
@@ -84,33 +84,33 @@ func (pgMgr *PodGroupManager) AskFlux(
 		nodes = append(nodes, node.NodeID)
 	}
 	jobid := uint64(r.GetJobID())
-	pgMgr.log.Info("[PodGroup AskFlux] parsed node pods list %s for job id %d\n", nodes, jobid)
+	podGroupManager.log.Info("[PodGroup AskFlux] parsed node pods list %s for job id %d\n", nodes, jobid)
 
 	// TODO would be nice to actually be able to ask flux jobs -a to fluence
 	// That way we can verify assignments, etc.
-	pgMgr.mutex.Lock()
-	pgMgr.groupToJobId[groupName] = jobid
-	pgMgr.mutex.Unlock()
+	podGroupManager.mutex.Lock()
+	podGroupManager.groupToJobId[groupName] = jobid
+	podGroupManager.mutex.Unlock()
 	return nodes, nil
 }
 
 // cancelFluxJobForPod cancels the flux job for a pod.
 // We assume that the cancelled job also means deleting the pod group
-func (pgMgr *PodGroupManager) cancelFluxJob(groupName string, pod *corev1.Pod) error {
+func (podGroupManager *PodGroupManager) cancelFluxJob(groupName string, pod *corev1.Pod) error {
 
-	jobid, ok := pgMgr.groupToJobId[groupName]
+	jobid, ok := podGroupManager.groupToJobId[groupName]
 
 	// The job was already cancelled by another pod
 	if !ok {
-		pgMgr.log.Info("[PodGroup cancelFluxJob] Request for cancel of group %s is already complete.", groupName)
+		podGroupManager.log.Info("[PodGroup cancelFluxJob] Request for cancel of group %s is already complete.", groupName)
 		return nil
 	}
-	pgMgr.log.Info("[PodGroup cancelFluxJob] Cancel flux job: %v for group %s", jobid, groupName)
+	podGroupManager.log.Info("[PodGroup cancelFluxJob] Cancel flux job: %v for group %s", jobid, groupName)
 
 	// This first error is about connecting to the server
 	conn, err := grpc.Dial("127.0.0.1:4242", grpc.WithInsecure())
 	if err != nil {
-		pgMgr.log.Error("[PodGroup cancelFluxJob] Error connecting to server: %v", err)
+		podGroupManager.log.Error("[PodGroup cancelFluxJob] Error connecting to server: %v", err)
 		return err
 	}
 	defer conn.Close()
@@ -123,28 +123,28 @@ func (pgMgr *PodGroupManager) cancelFluxJob(groupName string, pod *corev1.Pod) e
 	request := &pb.CancelRequest{JobID: int64(jobid)}
 	res, err := grpcclient.Cancel(context.Background(), request)
 	if err != nil {
-		pgMgr.log.Error("[PodGroup cancelFluxJob] did not receive any cancel response: %v", err)
+		podGroupManager.log.Error("[PodGroup cancelFluxJob] did not receive any cancel response: %v", err)
 		return err
 	}
-	pgMgr.log.Info("[PodGroup cancelFluxJob] Job cancellation for group %s result: %d", groupName, res.Error)
+	podGroupManager.log.Info("[PodGroup cancelFluxJob] Job cancellation for group %s result: %d", groupName, res.Error)
 
 	// And this error is if the cancel was successful or not
 	if res.Error == 0 {
-		pgMgr.log.Info("[PodGroup cancelFluxJob] Successful cancel of flux job: %d for group %s", jobid, groupName)
-		pgMgr.cleanup(pod, groupName)
+		podGroupManager.log.Info("[PodGroup cancelFluxJob] Successful cancel of flux job: %d for group %s", jobid, groupName)
+		podGroupManager.cleanup(pod, groupName)
 	} else {
-		pgMgr.log.Warning("[PodGroup cancelFluxJob] Failed to cancel flux job %d for group %s", jobid, groupName)
+		podGroupManager.log.Warning("[PodGroup cancelFluxJob] Failed to cancel flux job %d for group %s", jobid, groupName)
 	}
 	return nil
 }
 
 // cleanup deletes the group name from groupToJobId, and pods names from the node lookup
-func (pgMgr *PodGroupManager) cleanup(pod *corev1.Pod, groupName string) {
+func (podGroupManager *PodGroupManager) cleanup(pod *corev1.Pod, groupName string) {
 
-	delete(pgMgr.groupToJobId, groupName)
+	delete(podGroupManager.groupToJobId, groupName)
 
 	// Clean up previous pod->node assignments
-	pods, err := pgMgr.podLister.Pods(pod.Namespace).List(
+	pods, err := podGroupManager.podLister.Pods(pod.Namespace).List(
 		labels.SelectorFromSet(labels.Set{v1alpha1.PodGroupLabel: groupName}),
 	)
 	// TODO need to handle this / understand why it's the case
@@ -152,28 +152,28 @@ func (pgMgr *PodGroupManager) cleanup(pod *corev1.Pod, groupName string) {
 		return
 	}
 	for _, pod := range pods {
-		delete(pgMgr.podToNode, pod.Name)
+		delete(podGroupManager.podToNode, pod.Name)
 	}
 }
 
 // UpdatePod is called on an update, and the old and new object are presented
-func (pgMgr *PodGroupManager) UpdatePod(oldObj, newObj interface{}) {
+func (podGroupManager *PodGroupManager) UpdatePod(oldObj, newObj interface{}) {
 
 	oldPod := oldObj.(*corev1.Pod)
 	newPod := newObj.(*corev1.Pod)
 
 	// a pod is updated, get the group
 	// TODO should we be checking group / size for old vs new?
-	groupName, pg := pgMgr.GetPodGroup(context.TODO(), oldPod)
+	groupName, podGroup := podGroupManager.GetPodGroup(context.TODO(), oldPod)
 
 	// If PodGroup is nil, still try to look up a faux name
 	// TODO need to check if this might be problematic
-	if pg == nil {
-		pg = fgroup.CreateFakeGroup(oldPod)
-		groupName = pg.Name
+	if podGroup == nil {
+		podGroup = fgroup.CreateFakeGroup(oldPod)
+		groupName = podGroup.Name
 	}
 
-	pgMgr.log.Verbose("[PodGroup UpdatePod] Processing event for pod %s in group %s from %s to %s", newPod.Name, groupName, oldPod.Status.Phase, newPod.Status.Phase)
+	podGroupManager.log.Verbose("[PodGroup UpdatePod] Processing event for pod %s in group %s from %s to %s", newPod.Name, groupName, oldPod.Status.Phase, newPod.Status.Phase)
 
 	switch newPod.Status.Phase {
 	case corev1.PodPending:
@@ -181,34 +181,34 @@ func (pgMgr *PodGroupManager) UpdatePod(oldObj, newObj interface{}) {
 	case corev1.PodRunning:
 		// if a pod is start running, we can add it state to the delta graph if it is scheduled by other scheduler
 	case corev1.PodSucceeded:
-		pgMgr.log.Info("[PodGroup UpdatePod] Pod %s succeeded, Fluence needs to free the resources", newPod.Name)
+		podGroupManager.log.Info("[PodGroup UpdatePod] Pod %s succeeded, Fluence needs to free the resources", newPod.Name)
 
-		pgMgr.mutex.Lock()
-		defer pgMgr.mutex.Unlock()
+		podGroupManager.mutex.Lock()
+		defer podGroupManager.mutex.Unlock()
 
 		// Do we have the group id in our cache? If yes, we haven't deleted the jobid yet
 		// I am worried here that if some pods are succeeded and others pending, this could
 		// be a mistake - fluence would schedule it again
-		_, ok := pgMgr.groupToJobId[groupName]
+		_, ok := podGroupManager.groupToJobId[groupName]
 		if ok {
-			pgMgr.cancelFluxJob(groupName, oldPod)
+			podGroupManager.cancelFluxJob(groupName, oldPod)
 		} else {
-			pgMgr.log.Verbose("[PodGroup UpdatePod] Succeeded pod %s/%s in group %s doesn't have flux jobid", newPod.Namespace, newPod.Name, groupName)
+			podGroupManager.log.Verbose("[PodGroup UpdatePod] Succeeded pod %s/%s in group %s doesn't have flux jobid", newPod.Namespace, newPod.Name, groupName)
 		}
 
 	case corev1.PodFailed:
 
 		// a corner case need to be tested, the pod exit code is not 0, can be created with segmentation fault pi test
-		pgMgr.log.Warning("[PodGroup UpdatePod] Pod %s in group %s failed, Fluence needs to free the resources", newPod.Name, groupName)
+		podGroupManager.log.Warning("[PodGroup UpdatePod] Pod %s in group %s failed, Fluence needs to free the resources", newPod.Name, groupName)
 
-		pgMgr.mutex.Lock()
-		defer pgMgr.mutex.Unlock()
+		podGroupManager.mutex.Lock()
+		defer podGroupManager.mutex.Unlock()
 
-		_, ok := pgMgr.groupToJobId[groupName]
+		_, ok := podGroupManager.groupToJobId[groupName]
 		if ok {
-			pgMgr.cancelFluxJob(groupName, oldPod)
+			podGroupManager.cancelFluxJob(groupName, oldPod)
 		} else {
-			pgMgr.log.Error("[PodGroup UpdatePod] Failed pod %s/%s in group %s doesn't have flux jobid", newPod.Namespace, newPod.Name, groupName)
+			podGroupManager.log.Error("[PodGroup UpdatePod] Failed pod %s/%s in group %s doesn't have flux jobid", newPod.Namespace, newPod.Name, groupName)
 		}
 	case corev1.PodUnknown:
 		// don't know how to deal with it as it's unknown phase
@@ -218,40 +218,40 @@ func (pgMgr *PodGroupManager) UpdatePod(oldObj, newObj interface{}) {
 }
 
 // DeletePod handles the delete event handler
-func (pgMgr *PodGroupManager) DeletePod(podObj interface{}) {
+func (podGroupManager *PodGroupManager) DeletePod(podObj interface{}) {
 	pod := podObj.(*corev1.Pod)
-	groupName, pg := pgMgr.GetPodGroup(context.TODO(), pod)
+	groupName, podGroup := podGroupManager.GetPodGroup(context.TODO(), pod)
 
 	// If PodGroup is nil, still try to look up a faux name
-	if pg == nil {
-		pg = fgroup.CreateFakeGroup(pod)
-		groupName = pg.Name
+	if podGroup == nil {
+		podGroup = fgroup.CreateFakeGroup(pod)
+		groupName = podGroup.Name
 	}
 
-	pgMgr.log.Verbose("[PodGroup DeletePod] Delete pod %s in group %s has status %s", pod.Status.Phase, pod.Name, groupName)
+	podGroupManager.log.Verbose("[PodGroup DeletePod] Delete pod %s in group %s has status %s", pod.Status.Phase, pod.Name, groupName)
 	switch pod.Status.Phase {
 	case corev1.PodSucceeded:
 	case corev1.PodPending:
-		pgMgr.log.Verbose("[PodGroup DeletePod] Pod %s completed and is Pending termination, Fluence needs to free the resources", pod.Name)
+		podGroupManager.log.Verbose("[PodGroup DeletePod] Pod %s completed and is Pending termination, Fluence needs to free the resources", pod.Name)
 
-		pgMgr.mutex.Lock()
-		defer pgMgr.mutex.Unlock()
+		podGroupManager.mutex.Lock()
+		defer podGroupManager.mutex.Unlock()
 
-		_, ok := pgMgr.groupToJobId[groupName]
+		_, ok := podGroupManager.groupToJobId[groupName]
 		if ok {
-			pgMgr.cancelFluxJob(groupName, pod)
+			podGroupManager.cancelFluxJob(groupName, pod)
 		} else {
-			pgMgr.log.Info("[PodGroup DeletePod] Terminating pod %s/%s in group %s doesn't have flux jobid", pod.Namespace, pod.Name, groupName)
+			podGroupManager.log.Info("[PodGroup DeletePod] Terminating pod %s/%s in group %s doesn't have flux jobid", pod.Namespace, pod.Name, groupName)
 		}
 	case corev1.PodRunning:
-		pgMgr.mutex.Lock()
-		defer pgMgr.mutex.Unlock()
+		podGroupManager.mutex.Lock()
+		defer podGroupManager.mutex.Unlock()
 
-		_, ok := pgMgr.groupToJobId[groupName]
+		_, ok := podGroupManager.groupToJobId[groupName]
 		if ok {
-			pgMgr.cancelFluxJob(groupName, pod)
+			podGroupManager.cancelFluxJob(groupName, pod)
 		} else {
-			pgMgr.log.Info("[PodGroup DeletePod] Deleted pod %s/%s in group %s doesn't have flux jobid", pod.Namespace, pod.Name, groupName)
+			podGroupManager.log.Info("[PodGroup DeletePod] Deleted pod %s/%s in group %s doesn't have flux jobid", pod.Namespace, pod.Name, groupName)
 		}
 	}
 }
diff --git a/sig-scheduler-plugins/pkg/fluence/fluence.go b/sig-scheduler-plugins/pkg/fluence/fluence.go
index fe113d6..fddd3f0 100644
--- a/sig-scheduler-plugins/pkg/fluence/fluence.go
+++ b/sig-scheduler-plugins/pkg/fluence/fluence.go
@@ -50,9 +50,9 @@ type Fluence struct {
 	mutex            sync.Mutex
 	client           client.Client
 	frameworkHandler framework.Handle
-	pgMgr            fcore.Manager
+	podGroupManager  fcore.Manager
 	scheduleTimeout  *time.Duration
-	pgBackoff        *time.Duration
+	podGroupBackoff  *time.Duration
 	log              *logger.DebugLogger
 }
 
@@ -103,7 +103,7 @@ func New(_ context.Context, obj runtime.Object, handle framework.Handle) (framew
 
 	// PermitWaitingTimeSeconds is the waiting timeout in seconds.
 	scheduleTimeDuration := time.Duration(permitWaitingTimeSeconds) * time.Second
-	pgMgr := fcore.NewPodGroupManager(
+	podGroupManager := fcore.NewPodGroupManager(
 		client,
 		handle.SnapshotSharedLister(),
 		&scheduleTimeDuration,
@@ -112,20 +112,20 @@ func New(_ context.Context, obj runtime.Object, handle framework.Handle) (framew
 		l,
 	)
 
-	// Event handlers to call on pgMgr
+	// Event handlers to call on podGroupManager
 	fluxPodsInformer.AddEventHandler(cache.ResourceEventHandlerFuncs{
-		UpdateFunc: pgMgr.UpdatePod,
-		DeleteFunc: pgMgr.DeletePod,
+		UpdateFunc: podGroupManager.UpdatePod,
+		DeleteFunc: podGroupManager.DeletePod,
 	})
 	go fluxPodsInformer.Run(ctx.Done())
 
 	backoffSeconds := time.Duration(podGroupBackoffSeconds) * time.Second
 	plugin := &Fluence{
 		frameworkHandler: handle,
-		pgMgr:            pgMgr,
+		podGroupManager:  podGroupManager,
 		scheduleTimeout:  &scheduleTimeDuration,
 		log:              l,
-		pgBackoff:        &backoffSeconds,
+		podGroupBackoff:  &backoffSeconds,
 	}
 
 	// TODO this is not supported yet
@@ -144,10 +144,10 @@ func (f *Fluence) EventsToRegister() []framework.ClusterEventWithHint {
 	// TODO I have not redone this yet, not sure what it does (it might replace our informer above)
 	// To register a custom event, follow the naming convention at:
 	// https://git.k8s.io/kubernetes/pkg/scheduler/eventhandlers.go#L403-L410
-	pgGVK := fmt.Sprintf("podgroups.v1alpha1.%v", scheduling.GroupName)
+	podGroupGVK := fmt.Sprintf("podgroups.v1alpha1.%v", scheduling.GroupName)
 	return []framework.ClusterEventWithHint{
 		{Event: framework.ClusterEvent{Resource: framework.Pod, ActionType: framework.Add | framework.Delete}},
-		{Event: framework.ClusterEvent{Resource: framework.GVK(pgGVK), ActionType: framework.Add | framework.Update | framework.Delete}},
+		{Event: framework.ClusterEvent{Resource: framework.GVK(podGroupGVK), ActionType: framework.Add | framework.Update | framework.Delete}},
 	}
 }
 
@@ -193,8 +193,8 @@ func (f *Fluence) Less(podInfo1, podInfo2 *framework.QueuedPodInfo) bool {
 	// which is what fluence needs to distinguish between namespaces. Just the
 	// name could be replicated between different namespaces
 	ctx := context.TODO()
-	name1, podGroup1 := f.pgMgr.GetPodGroup(ctx, podInfo1.Pod)
-	name2, podGroup2 := f.pgMgr.GetPodGroup(ctx, podInfo2.Pod)
+	name1, podGroup1 := f.podGroupManager.GetPodGroup(ctx, podInfo1.Pod)
+	name2, podGroup2 := f.podGroupManager.GetPodGroup(ctx, podInfo2.Pod)
 
 	// Fluence can only compare if we have two known groups.
 	// This tries for that first, and falls back to the initial attempt timestamp
@@ -227,7 +227,7 @@ func (f *Fluence) PreFilter(
 
 	// Quick check if the pod is already scheduled
 	f.mutex.Lock()
-	node := f.pgMgr.GetPodNode(pod)
+	node := f.podGroupManager.GetPodNode(pod)
 	f.mutex.Unlock()
 	if node != "" {
 		f.log.Info("[Fluence PreFilter] assigned pod %s to node %s\n", pod.Name, node)
@@ -237,12 +237,12 @@ func (f *Fluence) PreFilter(
 	f.log.Info("[Fluence PreFilter] pod %s does not have a node assigned\n", pod.Name)
 
 	// This will populate the node name into the pod group manager
-	err := f.pgMgr.PreFilter(ctx, pod, state)
+	err := f.podGroupManager.PreFilter(ctx, pod, state)
 	if err != nil {
 		f.log.Error("[Fluence PreFilter] failed pod %s: %s", pod.Name, err.Error())
 		return nil, framework.NewStatus(framework.UnschedulableAndUnresolvable, err.Error())
 	}
-	node = f.pgMgr.GetPodNode(pod)
+	node = f.podGroupManager.GetPodNode(pod)
 	result := framework.PreFilterResult{NodeNames: sets.New(node)}
 	return &result, framework.NewStatus(framework.Success, "")
 }
@@ -255,17 +255,17 @@ func (f *Fluence) PostFilter(
 	filteredNodeStatusMap framework.NodeToStatusMap,
 ) (*framework.PostFilterResult, *framework.Status) {
 
-	pgName, pg := f.pgMgr.GetPodGroup(ctx, pod)
-	if pg == nil {
+	groupName, podGroup := f.podGroupManager.GetPodGroup(ctx, pod)
+	if podGroup == nil {
 		f.log.Info("Pod does not belong to any group, pod %s", pod.Name)
 		return &framework.PostFilterResult{}, framework.NewStatus(framework.Unschedulable, "can not find pod group")
 	}
 
 	// This explicitly checks nodes, and we can skip scheduling another pod if we already
 	// have the minimum. For fluence since we expect an exact size this likely is not needed
-	assigned := f.pgMgr.CalculateAssignedPods(pg.Name, pod.Namespace)
-	if assigned >= int(pg.Spec.MinMember) {
-		f.log.Info("Assigned pods podGroup %s is assigned %s", pgName, assigned)
+	assigned := f.podGroupManager.CalculateAssignedPods(podGroup.Name, pod.Namespace)
+	if assigned >= int(podGroup.Spec.MinMember) {
+		f.log.Info("Assigned pods podGroup %s is assigned %s", groupName, assigned)
 		return &framework.PostFilterResult{}, framework.NewStatus(framework.Unschedulable)
 	}
 
@@ -274,24 +274,24 @@ func (f *Fluence) PostFilter(
 	// It's based on an implicit assumption: if the nth Pod failed,
 	// it's inferrable other Pods belonging to the same PodGroup would be very likely to fail.
 	f.frameworkHandler.IterateOverWaitingPods(func(waitingPod framework.WaitingPod) {
-		if waitingPod.GetPod().Namespace == pod.Namespace && flabel.GetPodGroupLabel(waitingPod.GetPod()) == pg.Name {
-			f.log.Info("PostFilter rejects the pod for podGroup %s and pod %s", pgName, waitingPod.GetPod().Name)
+		if waitingPod.GetPod().Namespace == pod.Namespace && flabel.GetPodGroupLabel(waitingPod.GetPod()) == podGroup.Name {
+			f.log.Info("PostFilter rejects the pod for podGroup %s and pod %s", groupName, waitingPod.GetPod().Name)
 			waitingPod.Reject(f.Name(), "optimistic rejection in PostFilter")
 		}
 	})
 
-	if f.pgBackoff != nil {
+	if f.podGroupBackoff != nil {
 		pods, err := f.frameworkHandler.SharedInformerFactory().Core().V1().Pods().Lister().Pods(pod.Namespace).List(
 			labels.SelectorFromSet(labels.Set{v1alpha1.PodGroupLabel: flabel.GetPodGroupLabel(pod)}),
 		)
-		if err == nil && len(pods) >= int(pg.Spec.MinMember) {
-			f.pgMgr.BackoffPodGroup(pgName, *f.pgBackoff)
+		if err == nil && len(pods) >= int(podGroup.Spec.MinMember) {
+			f.podGroupManager.BackoffPodGroup(groupName, *f.podGroupBackoff)
 		}
 	}
 
-	f.pgMgr.DeletePermittedPodGroup(pgName)
+	f.podGroupManager.DeletePermittedPodGroup(groupName)
 	return &framework.PostFilterResult{}, framework.NewStatus(framework.Unschedulable,
-		fmt.Sprintf("PodGroup %v gets rejected due to Pod %v is unschedulable even after PostFilter", pgName, pod.Name))
+		fmt.Sprintf("PodGroup %v gets rejected due to Pod %v is unschedulable even after PostFilter", groupName, pod.Name))
 }
 
 // Permit is the functions invoked by the framework at "Permit" extension point.
@@ -304,7 +304,7 @@ func (f *Fluence) Permit(
 
 	f.log.Info("Checking permit for pod %s to node %s", pod.Name, nodeName)
 	waitTime := *f.scheduleTimeout
-	s := f.pgMgr.Permit(ctx, state, pod)
+	s := f.podGroupManager.Permit(ctx, state, pod)
 	var retStatus *framework.Status
 	switch s {
 	case fcore.PodGroupNotSpecified:
@@ -315,18 +315,18 @@ func (f *Fluence) Permit(
 		return framework.NewStatus(framework.Unschedulable, "PodGroup not found"), 0
 	case fcore.Wait:
 		f.log.Info("Pod %s is waiting to be scheduled to node %s", pod.Name, nodeName)
-		_, pg := f.pgMgr.GetPodGroup(ctx, pod)
-		if wait := fgroup.GetWaitTimeDuration(pg, f.scheduleTimeout); wait != 0 {
+		_, podGroup := f.podGroupManager.GetPodGroup(ctx, pod)
+		if wait := fgroup.GetWaitTimeDuration(podGroup, f.scheduleTimeout); wait != 0 {
 			waitTime = wait
 		}
 		retStatus = framework.NewStatus(framework.Wait)
 
 		// We will also request to move the sibling pods back to activeQ.
-		f.pgMgr.ActivateSiblings(pod, state)
+		f.podGroupManager.ActivateSiblings(pod, state)
 	case fcore.Success:
-		pgFullName := flabel.GetPodGroupFullName(pod)
+		podGroupFullName := flabel.GetPodGroupFullName(pod)
 		f.frameworkHandler.IterateOverWaitingPods(func(waitingPod framework.WaitingPod) {
-			if flabel.GetPodGroupFullName(waitingPod.GetPod()) == pgFullName {
+			if flabel.GetPodGroupFullName(waitingPod.GetPod()) == podGroupFullName {
 				f.log.Info("Permit allows pod %s", waitingPod.GetPod().Name)
 				waitingPod.Allow(f.Name())
 			}
@@ -346,15 +346,15 @@ func (f *Fluence) Reserve(ctx context.Context, state *framework.CycleState, pod
 
 // Unreserve rejects all other Pods in the PodGroup when one of the pods in the group times out.
 func (f *Fluence) Unreserve(ctx context.Context, state *framework.CycleState, pod *corev1.Pod, nodeName string) {
-	pgName, pg := f.pgMgr.GetPodGroup(ctx, pod)
-	if pg == nil {
+	groupName, podGroup := f.podGroupManager.GetPodGroup(ctx, pod)
+	if podGroup == nil {
 		return
 	}
 	f.frameworkHandler.IterateOverWaitingPods(func(waitingPod framework.WaitingPod) {
-		if waitingPod.GetPod().Namespace == pod.Namespace && flabel.GetPodGroupLabel(waitingPod.GetPod()) == pg.Name {
-			f.log.Info("Unreserve rejects pod %s in group %s", waitingPod.GetPod().Name, pgName)
+		if waitingPod.GetPod().Namespace == pod.Namespace && flabel.GetPodGroupLabel(waitingPod.GetPod()) == podGroup.Name {
+			f.log.Info("Unreserve rejects pod %s in group %s", waitingPod.GetPod().Name, groupName)
 			waitingPod.Reject(f.Name(), "rejection in Unreserve")
 		}
 	})
-	f.pgMgr.DeletePermittedPodGroup(pgName)
+	f.podGroupManager.DeletePermittedPodGroup(groupName)
 }
diff --git a/sig-scheduler-plugins/pkg/fluence/group/group.go b/sig-scheduler-plugins/pkg/fluence/group/group.go
index dd039e3..2c3a3c1 100644
--- a/sig-scheduler-plugins/pkg/fluence/group/group.go
+++ b/sig-scheduler-plugins/pkg/fluence/group/group.go
@@ -31,18 +31,18 @@ func CreateFakeGroup(pod *corev1.Pod) *sched.PodGroup {
 // GetCreationTimestamp first tries the fluence group, then falls back to the initial attempt timestamp
 // This is the only update we have made to the upstream PodGroupManager, because we are expecting
 // a MicroTime and not a time.Time.
-func GetCreationTimestamp(groupName string, pg *sched.PodGroup, podInfo *framework.QueuedPodInfo) metav1.MicroTime {
+func GetCreationTimestamp(groupName string, podGroup *sched.PodGroup, podInfo *framework.QueuedPodInfo) metav1.MicroTime {
 
 	// Don't try to get a time for a pod group that does not exist
-	if pg == nil {
+	if podGroup == nil {
 		return metav1.NewMicroTime(*podInfo.InitialAttemptTimestamp)
 	}
 
 	// IsZero is an indicator if this was actually set
 	// If the group label was present and we have a group, this will be true
-	if !pg.Status.ScheduleStartTime.IsZero() {
-		klog.Infof("   [Fluence] Pod group %s was created at %s\n", groupName, pg.Status.ScheduleStartTime)
-		return pg.Status.ScheduleStartTime
+	if !podGroup.Status.ScheduleStartTime.IsZero() {
+		klog.Infof("   [Fluence] Pod group %s was created at %s\n", groupName, podGroup.Status.ScheduleStartTime)
+		return podGroup.Status.ScheduleStartTime
 	}
 	// We should actually never get here.
 	klog.Errorf("   [Fluence] Pod group %s time IsZero, we should not have reached here", groupName)
@@ -50,12 +50,12 @@ func GetCreationTimestamp(groupName string, pg *sched.PodGroup, podInfo *framewo
 }
 
 // GetWaitTimeDuration returns a wait timeout based on the following precedences:
-// 1. spec.scheduleTimeoutSeconds of the given pg, if specified
+// 1. spec.scheduleTimeoutSeconds of the given podGroup, if specified
 // 2. given scheduleTimeout, if not nil
 // 3. fall back to DefaultWaitTime
-func GetWaitTimeDuration(pg *sched.PodGroup, scheduleTimeout *time.Duration) time.Duration {
-	if pg != nil && pg.Spec.ScheduleTimeoutSeconds != nil {
-		return time.Duration(*pg.Spec.ScheduleTimeoutSeconds) * time.Second
+func GetWaitTimeDuration(podGroup *sched.PodGroup, scheduleTimeout *time.Duration) time.Duration {
+	if podGroup != nil && podGroup.Spec.ScheduleTimeoutSeconds != nil {
+		return time.Duration(*podGroup.Spec.ScheduleTimeoutSeconds) * time.Second
 	}
 	if scheduleTimeout != nil && *scheduleTimeout != 0 {
 		return *scheduleTimeout
diff --git a/sig-scheduler-plugins/pkg/fluence/labels/labels.go b/sig-scheduler-plugins/pkg/fluence/labels/labels.go
index f955d67..eb96c72 100644
--- a/sig-scheduler-plugins/pkg/fluence/labels/labels.go
+++ b/sig-scheduler-plugins/pkg/fluence/labels/labels.go
@@ -46,11 +46,11 @@ func GetPodGroupLabel(pod *v1.Pod) string {
 
 // GetPodGroupFullName get namespaced group name from pod labels
 func GetPodGroupFullName(pod *v1.Pod) string {
-	pgName := GetPodGroupLabel(pod)
-	if len(pgName) == 0 {
+	groupName := GetPodGroupLabel(pod)
+	if len(groupName) == 0 {
 		return ""
 	}
-	return fmt.Sprintf("%v/%v", pod.Namespace, pgName)
+	return fmt.Sprintf("%v/%v", pod.Namespace, groupName)
 }
 
 // GetPodGroupSize gets the pod group size from the label

From cbeffceb04502a22396da984f620e8f9cd9ff99a Mon Sep 17 00:00:00 2001
From: vsoch <vsoch@users.noreply.github.com>
Date: Tue, 14 May 2024 00:27:30 -0600
Subject: [PATCH 28/28] fix: response to review comments

This set of changes includes the following:

1. Renaming short variable names to be longer and more understandable.
2. Not using the Status.ScheduleStartTime for the pod start time, but instead
adding a new field. This previous field was there for a different purpose.
3. Creating named identifiers for resource types that can be shared in the
jgf module along with others that use the same relations / vertex types.
4. Removing comments that are not necessary.
5. Changing JGF types from int to int64 that warrant it.
6. Spelling mistakes, etc.
7. Removing need to write jobspec to temporary file (we just need string)

The JGF and utils modules need some additional looking - specifically I am worried
that the paths->containment is not set, and sometimes the name reflects the index
of the overall graph (global) and other times the index of the resource type. I
think we likely want the latter for the inner name, but I am not sure in practice
that fluxion is using it (internally). I am pushing these changes to assess testing,
etc., and will update the PR as needed. There could also have been changes to
upstream since the PR was opened that warrant additional fixes.

Signed-off-by: vsoch <vsoch@users.noreply.github.com>
---
 README.md                                     |   4 +-
 .../scheduling/v1alpha1/podgroup_webhook.go   |  49 ++++---
 .../apis/scheduling/v1alpha1/types.go         |  12 +-
 .../cmd/controller/app/server.go              |   4 -
 .../templates/webhook-service.yaml            |   2 +-
 .../pkg/controllers/podgroup_controller.go    |  10 +-
 .../pkg/fluence/core/core.go                  |  30 +++--
 .../pkg/fluence/core/flux.go                  |  34 ++---
 sig-scheduler-plugins/pkg/fluence/fluence.go  | 103 ++++++++-------
 sig-scheduler-plugins/pkg/fluence/register.go |  10 +-
 .../pkg/fluence/utils/utils.go                |  22 ++--
 src/Makefile                                  |   4 +-
 src/fluence/cmd/main.go                       |   8 +-
 src/fluence/fluxion/fluxion.go                |  50 ++-----
 src/fluence/jgf/jgf.go                        | 123 +++++++++---------
 src/fluence/jgf/types.go                      |  10 +-
 src/fluence/jobspec/jobspec.go                |  37 +-----
 src/fluence/utils/utils.go                    |  95 +++++---------
 18 files changed, 269 insertions(+), 338 deletions(-)

diff --git a/README.md b/README.md
index 515d313..300eb1d 100644
--- a/README.md
+++ b/README.md
@@ -43,7 +43,7 @@ The way it works:
 1. We have a mutating admission webhook that looks for jobs and pods, and ensures there are fluence labels (likely we will add more abstractions).
 2. A PodGroup reconciler is watching for these same objects. When they are created:
   a. We find the labels and create the pod group object.
-  b. The pod group object has a timestamp for creation in milliseconds.
+  b. The pod group object has a timestamp for creation in microseconds.
 3. When the pod is then given to fluence for scheduling, it already has the PodGroup created with name/size and can properly sort.
 
 Here is an example of a Job intended for Fluence:
@@ -452,7 +452,7 @@ If you are looking to develop:
 
  - [src](src): includes source code for fluence. You'll find logs for this code in the `sidecar` container of the fluence pod.
  - [sig-scheduler-plugins](sig-scheduler-plugins): includes assets (manifests and Go files) that are intended to be added to the kubernetes-sigs/scheduler-plugins upstream repository before build. You'll find logs for this container in the `scheduler-plugins-scheduler` container of the pod.
-   - [apis](sig-scheduler-plugins/apis): customized PodGroup to define the status scheduled time in micro seconds
+   - [apis](sig-scheduler-plugins/apis): customized PodGroup to define the status scheduled time in microseconds
    - [manifests](sig-scheduler-plugins/manifests): manifests for helm and Kubernetes
    - [pkg](sig-scheduler-plugins/pkg): the main fluence module to add to upstream
    - [cmd](sig-scheduler-plugins/cmd): the main.go to replace in upstream   
diff --git a/sig-scheduler-plugins/apis/scheduling/v1alpha1/podgroup_webhook.go b/sig-scheduler-plugins/apis/scheduling/v1alpha1/podgroup_webhook.go
index c2582f9..7266d85 100644
--- a/sig-scheduler-plugins/apis/scheduling/v1alpha1/podgroup_webhook.go
+++ b/sig-scheduler-plugins/apis/scheduling/v1alpha1/podgroup_webhook.go
@@ -1,5 +1,5 @@
 /*
-Copyright 2023 Lawrence Livermore National Security, LLC
+Copyright 2024 Lawrence Livermore National Security, LLC
 
 (c.f. AUTHORS, NOTICE.LLNS, COPYING)
 SPDX-License-Identifier: MIT
@@ -50,14 +50,14 @@ type fluenceWatcher struct {
 // Handle is the main handler for the webhook, which is looking for jobs and pods (in that order)
 // If a job comes in (with a pod template) first, we add the labels there first (and they will
 // not be added again).
-func (a *fluenceWatcher) Handle(ctx context.Context, req admission.Request) admission.Response {
+func (hook *fluenceWatcher) Handle(ctx context.Context, req admission.Request) admission.Response {
 
 	logger.Info("Running webhook handle, determining pod wrapper abstraction...")
 
 	job := &batchv1.Job{}
-	err := a.decoder.Decode(req, job)
+	err := hook.decoder.Decode(req, job)
 	if err == nil {
-		err = a.EnsureGroupOnJob(job)
+		err = hook.EnsureGroupOnJob(job)
 		if err != nil {
 			logger.Error(err, "Issue adding PodGroup to Job")
 			return admission.Errored(http.StatusBadRequest, err)
@@ -72,9 +72,9 @@ func (a *fluenceWatcher) Handle(ctx context.Context, req admission.Request) admi
 	}
 
 	pod := &corev1.Pod{}
-	err = a.decoder.Decode(req, pod)
+	err = hook.decoder.Decode(req, pod)
 	if err == nil {
-		err = a.EnsureGroup(pod)
+		err = hook.EnsureGroup(pod)
 		if err != nil {
 			logger.Error(err, "Issue adding PodGroup to Pod")
 			return admission.Errored(http.StatusBadRequest, err)
@@ -89,9 +89,9 @@ func (a *fluenceWatcher) Handle(ctx context.Context, req admission.Request) admi
 	}
 
 	set := &appsv1.StatefulSet{}
-	err = a.decoder.Decode(req, set)
+	err = hook.decoder.Decode(req, set)
 	if err == nil {
-		err = a.EnsureGroupStatefulSet(set)
+		err = hook.EnsureGroupStatefulSet(set)
 		if err != nil {
 			logger.Error(err, "Issue adding PodGroup to StatefulSet")
 			return admission.Errored(http.StatusBadRequest, err)
@@ -105,15 +105,15 @@ func (a *fluenceWatcher) Handle(ctx context.Context, req admission.Request) admi
 		return admission.PatchResponseFromRaw(req.Object.Raw, marshalledSet)
 	}
 
-	d := &appsv1.Deployment{}
-	err = a.decoder.Decode(req, d)
+	deployment := &appsv1.Deployment{}
+	err = hook.decoder.Decode(req, deployment)
 	if err == nil {
-		err = a.EnsureGroupDeployment(d)
+		err = hook.EnsureGroupDeployment(deployment)
 		if err != nil {
 			logger.Error(err, "Issue adding PodGroup to Deployment")
 			return admission.Errored(http.StatusBadRequest, err)
 		}
-		marshalledD, err := json.Marshal(d)
+		marshalledD, err := json.Marshal(deployment)
 		if err != nil {
 			logger.Error(err, "Marshalling Deployment error")
 			return admission.Errored(http.StatusInternalServerError, err)
@@ -123,9 +123,9 @@ func (a *fluenceWatcher) Handle(ctx context.Context, req admission.Request) admi
 	}
 
 	rset := &appsv1.ReplicaSet{}
-	err = a.decoder.Decode(req, rset)
+	err = hook.decoder.Decode(req, rset)
 	if err == nil {
-		err = a.EnsureGroupReplicaSet(rset)
+		err = hook.EnsureGroupReplicaSet(rset)
 		if err != nil {
 			logger.Error(err, "Issue adding PodGroup to ReplicaSet")
 			return admission.Errored(http.StatusBadRequest, err)
@@ -145,29 +145,28 @@ func (a *fluenceWatcher) Handle(ctx context.Context, req admission.Request) admi
 }
 
 // Default is the expected entrypoint for a webhook...
-// I don't remember if this is even called...
-func (a *fluenceWatcher) Default(ctx context.Context, obj runtime.Object) error {
+func (hook *fluenceWatcher) Default(ctx context.Context, obj runtime.Object) error {
 
 	switch obj.(type) {
 	case *batchv1.Job:
 		job := obj.(*batchv1.Job)
-		return a.EnsureGroupOnJob(job)
+		return hook.EnsureGroupOnJob(job)
 
 	case *corev1.Pod:
 		pod := obj.(*corev1.Pod)
-		return a.EnsureGroup(pod)
+		return hook.EnsureGroup(pod)
 
 	case *appsv1.StatefulSet:
 		set := obj.(*appsv1.StatefulSet)
-		return a.EnsureGroupStatefulSet(set)
+		return hook.EnsureGroupStatefulSet(set)
 
 	case *appsv1.Deployment:
-		d := obj.(*appsv1.Deployment)
-		return a.EnsureGroupDeployment(d)
+		deployment := obj.(*appsv1.Deployment)
+		return hook.EnsureGroupDeployment(deployment)
 
 	case *appsv1.ReplicaSet:
 		set := obj.(*appsv1.ReplicaSet)
-		return a.EnsureGroupReplicaSet(set)
+		return hook.EnsureGroupReplicaSet(set)
 
 	default:
 		// no match
@@ -180,7 +179,7 @@ func (a *fluenceWatcher) Default(ctx context.Context, obj runtime.Object) error
 // Note that we need to do similar for Job.
 // A pod without a job wrapper, and without metadata is a group
 // of size 1.
-func (a *fluenceWatcher) EnsureGroup(pod *corev1.Pod) error {
+func (hook *fluenceWatcher) EnsureGroup(pod *corev1.Pod) error {
 
 	// Add labels if we don't have anything. Everything is a group!
 	if pod.Labels == nil {
@@ -221,7 +220,7 @@ func getJobLabel(job *batchv1.Job, labelName, defaultLabel string) string {
 
 // EnsureGroupOnJob looks for fluence labels (size and name) on both the job
 // and the pod template. We ultimately put on the pod, the lowest level unit.
-// Since we have the size of the job (paramllism) we can use that for the size
+// Since we have the size of the job (parallelism) we can use that for the size
 func (a *fluenceWatcher) EnsureGroupOnJob(job *batchv1.Job) error {
 
 	// Be forgiving - allow the person to specify it on the job directly or on the Podtemplate
@@ -252,7 +251,7 @@ func (a *fluenceWatcher) EnsureGroupOnJob(job *batchv1.Job) error {
 }
 
 // EnsureGroupStatefulSet creates a PodGroup for a StatefulSet
-func (a *fluenceWatcher) EnsureGroupStatefulSet(set *appsv1.StatefulSet) error {
+func (hook *fluenceWatcher) EnsureGroupStatefulSet(set *appsv1.StatefulSet) error {
 
 	// StatefulSet requires on top level explicitly
 	if set.Labels == nil {
diff --git a/sig-scheduler-plugins/apis/scheduling/v1alpha1/types.go b/sig-scheduler-plugins/apis/scheduling/v1alpha1/types.go
index 77f10f3..fca7854 100644
--- a/sig-scheduler-plugins/apis/scheduling/v1alpha1/types.go
+++ b/sig-scheduler-plugins/apis/scheduling/v1alpha1/types.go
@@ -136,12 +136,12 @@ type PodGroup struct {
 type PodGroupSpec struct {
 	// MinMember defines the minimal number of members/tasks to run the pod group;
 	// if there's not enough resources to start all tasks, the scheduler
-	// will not start anyone.
+	// will not start any.
 	MinMember int32 `json:"minMember,omitempty"`
 
 	// MinResources defines the minimal resource of members/tasks to run the pod group;
 	// if there's not enough resources to start all tasks, the scheduler
-	// will not start anyone.
+	// will not start any.
 	MinResources v1.ResourceList `json:"minResources,omitempty"`
 
 	// ScheduleTimeoutSeconds defines the maximal time of members/tasks to wait before run the pod group;
@@ -169,7 +169,13 @@ type PodGroupStatus struct {
 	// +optional
 	Failed int32 `json:"failed,omitempty"`
 
-	// ScheduleStartTime of the group (note that we changed this to a micro time)
+	// CreationTime is intended to mock the object CreationTime,
+	// but set by us to be MicroTime instead of Time.
+	// +optional
+	CreationTime metav1.MicroTime `json:"creationTime,omitempty"`
+
+	// ScheduleStartTime of the group is when we want to start counting
+	// "at time N plus 48 hours, this is when we deem time waited is too long"
 	// +optional
 	ScheduleStartTime metav1.MicroTime `json:"scheduleStartTime,omitempty"`
 }
diff --git a/sig-scheduler-plugins/cmd/controller/app/server.go b/sig-scheduler-plugins/cmd/controller/app/server.go
index aae8625..c10968e 100644
--- a/sig-scheduler-plugins/cmd/controller/app/server.go
+++ b/sig-scheduler-plugins/cmd/controller/app/server.go
@@ -65,10 +65,6 @@ func Run(s *ServerRunOptions) error {
 		return err
 	}
 
-	// Create a channel for the mutating webhook to communicate back to the reconciler
-	// This way we create the PodGroup before scheduling
-	//c := make(chan event.GenericEvent)
-
 	if err = (&controllers.PodGroupReconciler{
 		Client:  mgr.GetClient(),
 		Scheme:  mgr.GetScheme(),
diff --git a/sig-scheduler-plugins/manifests/install/charts/as-a-second-scheduler/templates/webhook-service.yaml b/sig-scheduler-plugins/manifests/install/charts/as-a-second-scheduler/templates/webhook-service.yaml
index bedfb95..e5339a1 100644
--- a/sig-scheduler-plugins/manifests/install/charts/as-a-second-scheduler/templates/webhook-service.yaml
+++ b/sig-scheduler-plugins/manifests/install/charts/as-a-second-scheduler/templates/webhook-service.yaml
@@ -12,4 +12,4 @@ spec:
   selector:
     app: scheduler-plugins-controller
   ports:
-	{{- .Values.webhookService.ports | toYaml | nindent 2 -}}
+    {{- .Values.webhookService.ports | toYaml | nindent 2 -}}
diff --git a/sig-scheduler-plugins/pkg/controllers/podgroup_controller.go b/sig-scheduler-plugins/pkg/controllers/podgroup_controller.go
index a2fd4a6..7afb815 100644
--- a/sig-scheduler-plugins/pkg/controllers/podgroup_controller.go
+++ b/sig-scheduler-plugins/pkg/controllers/podgroup_controller.go
@@ -83,6 +83,7 @@ func (r *PodGroupReconciler) Reconcile(ctx context.Context, req ctrl.Request) (c
 	log.Info("REFERENCES", "Reconciler", pg.ObjectMeta.OwnerReferences)
 
 	// Grab all statuses (and groups of them) we are interested in
+	// Note that 48 hours seems arbitrary, and if it is, we might make it a variable
 	schedulingOrPending := (pg.Status.Phase == schedv1alpha1.PodGroupScheduling || pg.Status.Phase == schedv1alpha1.PodGroupPending)
 	twoDaysOld := pg.Status.ScheduleStartTime.Sub(pg.CreationTimestamp.Time) > 48*time.Hour
 	finishedOrFailed := pg.Status.Phase == schedv1alpha1.PodGroupFinished || pg.Status.Phase == schedv1alpha1.PodGroupFailed
@@ -111,8 +112,11 @@ func (r *PodGroupReconciler) Reconcile(ctx context.Context, req ctrl.Request) (c
 		return ctrl.Result{}, err
 	}
 
-	// If the scheduler time created is Zero (not set) we set it here
-	if pg.Status.ScheduleStartTime.IsZero() {
+	// If the pod group creation time created is Zero (not set) we set it here
+	// This only happens on the first reconcile, which should also be when the
+	// pod group is created. We set it here and don't use the underlying object
+	// CreationTime because we need to change the granularity to ms.
+	if pg.Status.CreationTime.IsZero() {
 		return r.setTimeCreated(ctx, pg, podList.Items, timestamp)
 	}
 
@@ -159,7 +163,7 @@ func (r *PodGroupReconciler) setTimeCreated(
 
 	// Now patch to update it
 	patch := client.MergeFrom(pg.DeepCopy())
-	pg.Status.ScheduleStartTime = timestamp
+	pg.Status.CreationTime = timestamp
 
 	// Apply the patch to update the size
 	r.Status().Update(ctx, pg)
diff --git a/sig-scheduler-plugins/pkg/fluence/core/core.go b/sig-scheduler-plugins/pkg/fluence/core/core.go
index a74e749..9de5a26 100644
--- a/sig-scheduler-plugins/pkg/fluence/core/core.go
+++ b/sig-scheduler-plugins/pkg/fluence/core/core.go
@@ -78,7 +78,7 @@ type Manager interface {
 	PreFilter(context.Context, *corev1.Pod, *framework.CycleState) error
 	GetPodNode(*corev1.Pod) string
 	GetPodGroup(context.Context, *corev1.Pod) (string, *v1alpha1.PodGroup)
-	GetCreationTimestamp(*corev1.Pod, time.Time) time.Time
+	GetCreationTimestamp(*corev1.Pod, time.Time) metav1.MicroTime
 	DeletePermittedPodGroup(string)
 	Permit(context.Context, *framework.CycleState, *corev1.Pod) Status
 	CalculateAssignedPods(string, string) int
@@ -255,8 +255,8 @@ func (podGroupManager *PodGroupManager) PreFilter(
 		return nil
 	}
 
-	_, exist := podGroupManager.backedOffpodGroup.Get(groupName)
-	if exist {
+	_, exists := podGroupManager.backedOffpodGroup.Get(groupName)
+	if exists {
 		return fmt.Errorf("podGroup %v failed recently", groupName)
 	}
 
@@ -290,8 +290,8 @@ func (podGroupManager *PodGroupManager) PreFilter(
 	// TODO(cwdsuzhou): This resource check may not always pre-catch unschedulable pod group.
 	// It only tries to PreFilter resource constraints so even if a PodGroup passed here,
 	// it may not necessarily pass Filter due to other constraints such as affinity/taints.
-	_, ok := podGroupManager.permittedpodGroup.Get(groupName)
-	if ok {
+	_, exists = podGroupManager.permittedpodGroup.Get(groupName)
+	if exists {
 		podGroupManager.log.Info("[PodGroup PreFilter] Pod Group %s is already admitted", groupName)
 		return nil
 	}
@@ -331,17 +331,27 @@ func (podGroupManager *PodGroupManager) PreFilter(
 	return nil
 }
 
-// GetCreationTimestamp returns the creation time of a podGroup or a pod.
-func (podGroupManager *PodGroupManager) GetCreationTimestamp(pod *corev1.Pod, ts time.Time) time.Time {
+// GetCreationTimestamp returns the creation time of a podGroup or a pod in seconds (time.MicroTime)
+// The Status.CreationTime is set by the PodGroup reconciler, which has to happen before we have
+// a PodGroup. I don't see cases when this wouldn't happen, but in case we fall back to
+// converting the pg.CreationTime to a MicroTime
+func (podGroupManager *PodGroupManager) GetCreationTimestamp(pod *corev1.Pod, ts time.Time) metav1.MicroTime {
 	groupName := util.GetPodGroupLabel(pod)
 	if len(groupName) == 0 {
-		return ts
+		return metav1.NewMicroTime(ts)
 	}
 	var podGroup v1alpha1.PodGroup
 	if err := podGroupManager.client.Get(context.TODO(), types.NamespacedName{Namespace: pod.Namespace, Name: groupName}, &podGroup); err != nil {
-		return ts
+		return metav1.NewMicroTime(ts)
 	}
-	return podGroup.CreationTimestamp.Time
+	// First preference goes to microseconds. This should be set, as it is set by the first
+	// reconcile, and we wouldn'thave a pod group if it didn't pass through that.
+	if !podGroup.Status.CreationTime.IsZero() {
+		return podGroup.Status.CreationTime
+	}
+	// Fall back to CreationTime from Kubernetes, in seconds
+	// In practice this should not happen
+	return metav1.NewMicroTime(podGroup.CreationTimestamp.Time)
 }
 
 // CalculateAssignedPods returns the number of pods that has been assigned nodes: assumed or bound.
diff --git a/sig-scheduler-plugins/pkg/fluence/core/flux.go b/sig-scheduler-plugins/pkg/fluence/core/flux.go
index 50c8ff1..24c9212 100644
--- a/sig-scheduler-plugins/pkg/fluence/core/flux.go
+++ b/sig-scheduler-plugins/pkg/fluence/core/flux.go
@@ -69,21 +69,21 @@ func (podGroupManager *PodGroupManager) AskFlux(
 	}
 
 	// An error here is an error with making the request
-	r, err := grpcclient.Match(context.Background(), request)
+	response, err := grpcclient.Match(context.Background(), request)
 	if err != nil {
 		podGroupManager.log.Warning("[PodGroup AskFlux] did not receive any match response: %v\n", err)
 		return nodes, err
 	}
 
 	// TODO GetPodID should be renamed, because it will reflect the group
-	podGroupManager.log.Info("[PodGroup AskFlux] Match response ID %s\n", r.GetPodID())
+	podGroupManager.log.Info("[PodGroup AskFlux] Match response ID %s\n", response.GetPodID())
 
 	// Get the nodelist and inspect
-	nodelist := r.GetNodelist()
+	nodelist := response.GetNodelist()
 	for _, node := range nodelist {
 		nodes = append(nodes, node.NodeID)
 	}
-	jobid := uint64(r.GetJobID())
+	jobid := uint64(response.GetJobID())
 	podGroupManager.log.Info("[PodGroup AskFlux] parsed node pods list %s for job id %d\n", nodes, jobid)
 
 	// TODO would be nice to actually be able to ask flux jobs -a to fluence
@@ -98,10 +98,10 @@ func (podGroupManager *PodGroupManager) AskFlux(
 // We assume that the cancelled job also means deleting the pod group
 func (podGroupManager *PodGroupManager) cancelFluxJob(groupName string, pod *corev1.Pod) error {
 
-	jobid, ok := podGroupManager.groupToJobId[groupName]
+	jobid, exists := podGroupManager.groupToJobId[groupName]
 
 	// The job was already cancelled by another pod
-	if !ok {
+	if !exists {
 		podGroupManager.log.Info("[PodGroup cancelFluxJob] Request for cancel of group %s is already complete.", groupName)
 		return nil
 	}
@@ -121,15 +121,15 @@ func (podGroupManager *PodGroupManager) cancelFluxJob(groupName string, pod *cor
 
 	// This error reflects the success or failure of the cancel request
 	request := &pb.CancelRequest{JobID: int64(jobid)}
-	res, err := grpcclient.Cancel(context.Background(), request)
+	response, err := grpcclient.Cancel(context.Background(), request)
 	if err != nil {
 		podGroupManager.log.Error("[PodGroup cancelFluxJob] did not receive any cancel response: %v", err)
 		return err
 	}
-	podGroupManager.log.Info("[PodGroup cancelFluxJob] Job cancellation for group %s result: %d", groupName, res.Error)
+	podGroupManager.log.Info("[PodGroup cancelFluxJob] Job cancellation for group %s result: %d", groupName, response.Error)
 
 	// And this error is if the cancel was successful or not
-	if res.Error == 0 {
+	if response.Error == 0 {
 		podGroupManager.log.Info("[PodGroup cancelFluxJob] Successful cancel of flux job: %d for group %s", jobid, groupName)
 		podGroupManager.cleanup(pod, groupName)
 	} else {
@@ -189,8 +189,8 @@ func (podGroupManager *PodGroupManager) UpdatePod(oldObj, newObj interface{}) {
 		// Do we have the group id in our cache? If yes, we haven't deleted the jobid yet
 		// I am worried here that if some pods are succeeded and others pending, this could
 		// be a mistake - fluence would schedule it again
-		_, ok := podGroupManager.groupToJobId[groupName]
-		if ok {
+		_, exists := podGroupManager.groupToJobId[groupName]
+		if exists {
 			podGroupManager.cancelFluxJob(groupName, oldPod)
 		} else {
 			podGroupManager.log.Verbose("[PodGroup UpdatePod] Succeeded pod %s/%s in group %s doesn't have flux jobid", newPod.Namespace, newPod.Name, groupName)
@@ -204,8 +204,8 @@ func (podGroupManager *PodGroupManager) UpdatePod(oldObj, newObj interface{}) {
 		podGroupManager.mutex.Lock()
 		defer podGroupManager.mutex.Unlock()
 
-		_, ok := podGroupManager.groupToJobId[groupName]
-		if ok {
+		_, exists := podGroupManager.groupToJobId[groupName]
+		if exists {
 			podGroupManager.cancelFluxJob(groupName, oldPod)
 		} else {
 			podGroupManager.log.Error("[PodGroup UpdatePod] Failed pod %s/%s in group %s doesn't have flux jobid", newPod.Namespace, newPod.Name, groupName)
@@ -237,8 +237,8 @@ func (podGroupManager *PodGroupManager) DeletePod(podObj interface{}) {
 		podGroupManager.mutex.Lock()
 		defer podGroupManager.mutex.Unlock()
 
-		_, ok := podGroupManager.groupToJobId[groupName]
-		if ok {
+		_, exists := podGroupManager.groupToJobId[groupName]
+		if exists {
 			podGroupManager.cancelFluxJob(groupName, pod)
 		} else {
 			podGroupManager.log.Info("[PodGroup DeletePod] Terminating pod %s/%s in group %s doesn't have flux jobid", pod.Namespace, pod.Name, groupName)
@@ -247,8 +247,8 @@ func (podGroupManager *PodGroupManager) DeletePod(podObj interface{}) {
 		podGroupManager.mutex.Lock()
 		defer podGroupManager.mutex.Unlock()
 
-		_, ok := podGroupManager.groupToJobId[groupName]
-		if ok {
+		_, exists := podGroupManager.groupToJobId[groupName]
+		if exists {
 			podGroupManager.cancelFluxJob(groupName, pod)
 		} else {
 			podGroupManager.log.Info("[PodGroup DeletePod] Deleted pod %s/%s in group %s doesn't have flux jobid", pod.Namespace, pod.Name, groupName)
diff --git a/sig-scheduler-plugins/pkg/fluence/fluence.go b/sig-scheduler-plugins/pkg/fluence/fluence.go
index fddd3f0..44f0349 100644
--- a/sig-scheduler-plugins/pkg/fluence/fluence.go
+++ b/sig-scheduler-plugins/pkg/fluence/fluence.go
@@ -134,14 +134,13 @@ func New(_ context.Context, obj runtime.Object, handle framework.Handle) (framew
 	return plugin, err
 }
 
-func (f *Fluence) Name() string {
+func (fluence *Fluence) Name() string {
 	return Name
 }
 
 // Fluence has added delete, although I wonder if update includes that signal
 // and it's redundant?
-func (f *Fluence) EventsToRegister() []framework.ClusterEventWithHint {
-	// TODO I have not redone this yet, not sure what it does (it might replace our informer above)
+func (fluence *Fluence) EventsToRegister() []framework.ClusterEventWithHint {
 	// To register a custom event, follow the naming convention at:
 	// https://git.k8s.io/kubernetes/pkg/scheduler/eventhandlers.go#L403-L410
 	podGroupGVK := fmt.Sprintf("podgroups.v1alpha1.%v", scheduling.GroupName)
@@ -152,14 +151,14 @@ func (f *Fluence) EventsToRegister() []framework.ClusterEventWithHint {
 }
 
 // TODO we need to account for affinity here
-func (f *Fluence) Filter(
+func (fluence *Fluence) Filter(
 	ctx context.Context,
 	cycleState *framework.CycleState,
 	pod *corev1.Pod,
 	nodeInfo *framework.NodeInfo,
 ) *framework.Status {
 
-	f.log.Verbose("[Fluence Filter] Filtering input node %s", nodeInfo.Node().Name)
+	fluence.log.Verbose("[Fluence Filter] Filtering input node %s", nodeInfo.Node().Name)
 	state, err := cycleState.Read(framework.StateKey(pod.Name))
 
 	// No error means we retrieved the state
@@ -172,7 +171,7 @@ func (f *Fluence) Filter(
 		if ok && value.NodeName != nodeInfo.Node().Name {
 			return framework.NewStatus(framework.Unschedulable, "pod is not permitted")
 		} else {
-			f.log.Info("[Fluence Filter] node %s selected for %s\n", value.NodeName, pod.Name)
+			fluence.log.Info("[Fluence Filter] node %s selected for %s\n", value.NodeName, pod.Name)
 		}
 	}
 	return framework.NewStatus(framework.Success)
@@ -182,7 +181,7 @@ func (f *Fluence) Filter(
 // 1. Compare the priorities of Pods.
 // 2. Compare the initialization timestamps of PodGroups or Pods.
 // 3. Compare the keys of PodGroups/Pods: <namespace>/<podname>.
-func (f *Fluence) Less(podInfo1, podInfo2 *framework.QueuedPodInfo) bool {
+func (fluence *Fluence) Less(podInfo1, podInfo2 *framework.QueuedPodInfo) bool {
 	prio1 := corev1helpers.PodPriority(podInfo1.Pod)
 	prio2 := corev1helpers.PodPriority(podInfo2.Pod)
 	if prio1 != prio2 {
@@ -193,8 +192,8 @@ func (f *Fluence) Less(podInfo1, podInfo2 *framework.QueuedPodInfo) bool {
 	// which is what fluence needs to distinguish between namespaces. Just the
 	// name could be replicated between different namespaces
 	ctx := context.TODO()
-	name1, podGroup1 := f.podGroupManager.GetPodGroup(ctx, podInfo1.Pod)
-	name2, podGroup2 := f.podGroupManager.GetPodGroup(ctx, podInfo2.Pod)
+	name1, podGroup1 := fluence.podGroupManager.GetPodGroup(ctx, podInfo1.Pod)
+	name2, podGroup2 := fluence.podGroupManager.GetPodGroup(ctx, podInfo2.Pod)
 
 	// Fluence can only compare if we have two known groups.
 	// This tries for that first, and falls back to the initial attempt timestamp
@@ -212,60 +211,60 @@ func (f *Fluence) Less(podInfo1, podInfo2 *framework.QueuedPodInfo) bool {
 // PreFilterExtensions allow for callbacks on filtered states
 // This is required to be defined for a PreFilter plugin
 // https://github.com/kubernetes/kubernetes/blob/master/pkg/scheduler/framework/interface.go#L383
-func (f *Fluence) PreFilterExtensions() framework.PreFilterExtensions {
+func (fluence *Fluence) PreFilterExtensions() framework.PreFilterExtensions {
 	return nil
 }
 
 // PreFilter performs the following validations.
 // 1. Whether the PodGroup that the Pod belongs to is on the deny list.
 // 2. Whether the total number of pods in a PodGroup is less than its `minMember`.
-func (f *Fluence) PreFilter(
+func (fluence *Fluence) PreFilter(
 	ctx context.Context,
 	state *framework.CycleState,
 	pod *corev1.Pod,
 ) (*framework.PreFilterResult, *framework.Status) {
 
 	// Quick check if the pod is already scheduled
-	f.mutex.Lock()
-	node := f.podGroupManager.GetPodNode(pod)
-	f.mutex.Unlock()
+	fluence.mutex.Lock()
+	node := fluence.podGroupManager.GetPodNode(pod)
+	fluence.mutex.Unlock()
 	if node != "" {
-		f.log.Info("[Fluence PreFilter] assigned pod %s to node %s\n", pod.Name, node)
+		fluence.log.Info("[Fluence PreFilter] assigned pod %s to node %s\n", pod.Name, node)
 		result := framework.PreFilterResult{NodeNames: sets.New(node)}
 		return &result, framework.NewStatus(framework.Success, "")
 	}
-	f.log.Info("[Fluence PreFilter] pod %s does not have a node assigned\n", pod.Name)
+	fluence.log.Info("[Fluence PreFilter] pod %s does not have a node assigned\n", pod.Name)
 
 	// This will populate the node name into the pod group manager
-	err := f.podGroupManager.PreFilter(ctx, pod, state)
+	err := fluence.podGroupManager.PreFilter(ctx, pod, state)
 	if err != nil {
-		f.log.Error("[Fluence PreFilter] failed pod %s: %s", pod.Name, err.Error())
+		fluence.log.Error("[Fluence PreFilter] failed pod %s: %s", pod.Name, err.Error())
 		return nil, framework.NewStatus(framework.UnschedulableAndUnresolvable, err.Error())
 	}
-	node = f.podGroupManager.GetPodNode(pod)
+	node = fluence.podGroupManager.GetPodNode(pod)
 	result := framework.PreFilterResult{NodeNames: sets.New(node)}
 	return &result, framework.NewStatus(framework.Success, "")
 }
 
 // PostFilter is used to reject a group of pods if a pod does not pass PreFilter or Filter.
-func (f *Fluence) PostFilter(
+func (fluence *Fluence) PostFilter(
 	ctx context.Context,
 	state *framework.CycleState,
 	pod *corev1.Pod,
 	filteredNodeStatusMap framework.NodeToStatusMap,
 ) (*framework.PostFilterResult, *framework.Status) {
 
-	groupName, podGroup := f.podGroupManager.GetPodGroup(ctx, pod)
+	groupName, podGroup := fluence.podGroupManager.GetPodGroup(ctx, pod)
 	if podGroup == nil {
-		f.log.Info("Pod does not belong to any group, pod %s", pod.Name)
+		fluence.log.Info("Pod does not belong to any group, pod %s", pod.Name)
 		return &framework.PostFilterResult{}, framework.NewStatus(framework.Unschedulable, "can not find pod group")
 	}
 
 	// This explicitly checks nodes, and we can skip scheduling another pod if we already
 	// have the minimum. For fluence since we expect an exact size this likely is not needed
-	assigned := f.podGroupManager.CalculateAssignedPods(podGroup.Name, pod.Namespace)
+	assigned := fluence.podGroupManager.CalculateAssignedPods(podGroup.Name, pod.Namespace)
 	if assigned >= int(podGroup.Spec.MinMember) {
-		f.log.Info("Assigned pods podGroup %s is assigned %s", groupName, assigned)
+		fluence.log.Info("Assigned pods podGroup %s is assigned %s", groupName, assigned)
 		return &framework.PostFilterResult{}, framework.NewStatus(framework.Unschedulable)
 	}
 
@@ -273,65 +272,65 @@ func (f *Fluence) PostFilter(
 
 	// It's based on an implicit assumption: if the nth Pod failed,
 	// it's inferrable other Pods belonging to the same PodGroup would be very likely to fail.
-	f.frameworkHandler.IterateOverWaitingPods(func(waitingPod framework.WaitingPod) {
+	fluence.frameworkHandler.IterateOverWaitingPods(func(waitingPod framework.WaitingPod) {
 		if waitingPod.GetPod().Namespace == pod.Namespace && flabel.GetPodGroupLabel(waitingPod.GetPod()) == podGroup.Name {
-			f.log.Info("PostFilter rejects the pod for podGroup %s and pod %s", groupName, waitingPod.GetPod().Name)
-			waitingPod.Reject(f.Name(), "optimistic rejection in PostFilter")
+			fluence.log.Info("PostFilter rejects the pod for podGroup %s and pod %s", groupName, waitingPod.GetPod().Name)
+			waitingPod.Reject(fluence.Name(), "optimistic rejection in PostFilter")
 		}
 	})
 
-	if f.podGroupBackoff != nil {
-		pods, err := f.frameworkHandler.SharedInformerFactory().Core().V1().Pods().Lister().Pods(pod.Namespace).List(
+	if fluence.podGroupBackoff != nil {
+		pods, err := fluence.frameworkHandler.SharedInformerFactory().Core().V1().Pods().Lister().Pods(pod.Namespace).List(
 			labels.SelectorFromSet(labels.Set{v1alpha1.PodGroupLabel: flabel.GetPodGroupLabel(pod)}),
 		)
 		if err == nil && len(pods) >= int(podGroup.Spec.MinMember) {
-			f.podGroupManager.BackoffPodGroup(groupName, *f.podGroupBackoff)
+			fluence.podGroupManager.BackoffPodGroup(groupName, *fluence.podGroupBackoff)
 		}
 	}
 
-	f.podGroupManager.DeletePermittedPodGroup(groupName)
+	fluence.podGroupManager.DeletePermittedPodGroup(groupName)
 	return &framework.PostFilterResult{}, framework.NewStatus(framework.Unschedulable,
 		fmt.Sprintf("PodGroup %v gets rejected due to Pod %v is unschedulable even after PostFilter", groupName, pod.Name))
 }
 
 // Permit is the functions invoked by the framework at "Permit" extension point.
-func (f *Fluence) Permit(
+func (fluence *Fluence) Permit(
 	ctx context.Context,
 	state *framework.CycleState,
 	pod *corev1.Pod,
 	nodeName string,
 ) (*framework.Status, time.Duration) {
 
-	f.log.Info("Checking permit for pod %s to node %s", pod.Name, nodeName)
-	waitTime := *f.scheduleTimeout
-	s := f.podGroupManager.Permit(ctx, state, pod)
+	fluence.log.Info("Checking permit for pod %s to node %s", pod.Name, nodeName)
+	waitTime := *fluence.scheduleTimeout
+	s := fluence.podGroupManager.Permit(ctx, state, pod)
 	var retStatus *framework.Status
 	switch s {
 	case fcore.PodGroupNotSpecified:
-		f.log.Info("Checking permit for pod %s to node %s: PodGroupNotSpecified", pod.Name, nodeName)
+		fluence.log.Info("Checking permit for pod %s to node %s: PodGroupNotSpecified", pod.Name, nodeName)
 		return framework.NewStatus(framework.Success, ""), 0
 	case fcore.PodGroupNotFound:
-		f.log.Info("Checking permit for pod %s to node %s: PodGroupNotFound", pod.Name, nodeName)
+		fluence.log.Info("Checking permit for pod %s to node %s: PodGroupNotFound", pod.Name, nodeName)
 		return framework.NewStatus(framework.Unschedulable, "PodGroup not found"), 0
 	case fcore.Wait:
-		f.log.Info("Pod %s is waiting to be scheduled to node %s", pod.Name, nodeName)
-		_, podGroup := f.podGroupManager.GetPodGroup(ctx, pod)
-		if wait := fgroup.GetWaitTimeDuration(podGroup, f.scheduleTimeout); wait != 0 {
+		fluence.log.Info("Pod %s is waiting to be scheduled to node %s", pod.Name, nodeName)
+		_, podGroup := fluence.podGroupManager.GetPodGroup(ctx, pod)
+		if wait := fgroup.GetWaitTimeDuration(podGroup, fluence.scheduleTimeout); wait != 0 {
 			waitTime = wait
 		}
 		retStatus = framework.NewStatus(framework.Wait)
 
 		// We will also request to move the sibling pods back to activeQ.
-		f.podGroupManager.ActivateSiblings(pod, state)
+		fluence.podGroupManager.ActivateSiblings(pod, state)
 	case fcore.Success:
 		podGroupFullName := flabel.GetPodGroupFullName(pod)
-		f.frameworkHandler.IterateOverWaitingPods(func(waitingPod framework.WaitingPod) {
+		fluence.frameworkHandler.IterateOverWaitingPods(func(waitingPod framework.WaitingPod) {
 			if flabel.GetPodGroupFullName(waitingPod.GetPod()) == podGroupFullName {
-				f.log.Info("Permit allows pod %s", waitingPod.GetPod().Name)
-				waitingPod.Allow(f.Name())
+				fluence.log.Info("Permit allows pod %s", waitingPod.GetPod().Name)
+				waitingPod.Allow(fluence.Name())
 			}
 		})
-		f.log.Info("Permit allows pod %s", pod.Name)
+		fluence.log.Info("Permit allows pod %s", pod.Name)
 		retStatus = framework.NewStatus(framework.Success)
 		waitTime = 0
 	}
@@ -340,21 +339,21 @@ func (f *Fluence) Permit(
 }
 
 // Reserve is the functions invoked by the framework at "reserve" extension point.
-func (f *Fluence) Reserve(ctx context.Context, state *framework.CycleState, pod *corev1.Pod, nodeName string) *framework.Status {
+func (fluence *Fluence) Reserve(ctx context.Context, state *framework.CycleState, pod *corev1.Pod, nodeName string) *framework.Status {
 	return nil
 }
 
 // Unreserve rejects all other Pods in the PodGroup when one of the pods in the group times out.
-func (f *Fluence) Unreserve(ctx context.Context, state *framework.CycleState, pod *corev1.Pod, nodeName string) {
-	groupName, podGroup := f.podGroupManager.GetPodGroup(ctx, pod)
+func (fluence *Fluence) Unreserve(ctx context.Context, state *framework.CycleState, pod *corev1.Pod, nodeName string) {
+	groupName, podGroup := fluence.podGroupManager.GetPodGroup(ctx, pod)
 	if podGroup == nil {
 		return
 	}
-	f.frameworkHandler.IterateOverWaitingPods(func(waitingPod framework.WaitingPod) {
+	fluence.frameworkHandler.IterateOverWaitingPods(func(waitingPod framework.WaitingPod) {
 		if waitingPod.GetPod().Namespace == pod.Namespace && flabel.GetPodGroupLabel(waitingPod.GetPod()) == podGroup.Name {
-			f.log.Info("Unreserve rejects pod %s in group %s", waitingPod.GetPod().Name, groupName)
-			waitingPod.Reject(f.Name(), "rejection in Unreserve")
+			fluence.log.Info("Unreserve rejects pod %s in group %s", waitingPod.GetPod().Name, groupName)
+			waitingPod.Reject(fluence.Name(), "rejection in Unreserve")
 		}
 	})
-	f.podGroupManager.DeletePermittedPodGroup(groupName)
+	fluence.podGroupManager.DeletePermittedPodGroup(groupName)
 }
diff --git a/sig-scheduler-plugins/pkg/fluence/register.go b/sig-scheduler-plugins/pkg/fluence/register.go
index 8f39f09..1505633 100644
--- a/sig-scheduler-plugins/pkg/fluence/register.go
+++ b/sig-scheduler-plugins/pkg/fluence/register.go
@@ -29,27 +29,27 @@ import (
 // here goes away we cannot remove it from being known. But it's better than
 // not having it, and having fluxion assume more resources than the
 // cluster has available. This is a TODO as fluxion does not support it
-func (f *Fluence) RegisterExisting(ctx context.Context) error {
+func (fluence *Fluence) RegisterExisting(ctx context.Context) error {
 
 	// creates an in-cluster config and client
 	config, err := rest.InClusterConfig()
 	if err != nil {
-		f.log.Error("[Fluence RegisterExisting] Error creating in-cluster config: %s\n", err)
+		fluence.log.Error("[Fluence RegisterExisting] Error creating in-cluster config: %s\n", err)
 		return err
 	}
 	// creates the clientset
 	clientset, err := kubernetes.NewForConfig(config)
 	if err != nil {
-		f.log.Error("[Fluence RegisterExisting] Error creating client for config: %s\n", err)
+		fluence.log.Error("[Fluence RegisterExisting] Error creating client for config: %s\n", err)
 		return err
 	}
 	// get pods in all the namespaces by omitting namespace
 	// Or specify namespace to get pods in particular namespace
 	pods, err := clientset.CoreV1().Pods("").List(ctx, metav1.ListOptions{})
 	if err != nil {
-		f.log.Info("[Fluence RegisterExisting] Error listing pods: %s\n", err)
+		fluence.log.Info("[Fluence RegisterExisting] Error listing pods: %s\n", err)
 		return err
 	}
-	f.log.Info("[Fluence RegisterExisting] Found %d existing pods in the cluster\n", len(pods.Items))
+	fluence.log.Info("[Fluence RegisterExisting] Found %d existing pods in the cluster\n", len(pods.Items))
 	return nil
 }
diff --git a/sig-scheduler-plugins/pkg/fluence/utils/utils.go b/sig-scheduler-plugins/pkg/fluence/utils/utils.go
index f24f6d4..da9053b 100644
--- a/sig-scheduler-plugins/pkg/fluence/utils/utils.go
+++ b/sig-scheduler-plugins/pkg/fluence/utils/utils.go
@@ -44,16 +44,16 @@ func getPodJobspecLabels(pod *v1.Pod) []string {
 // jobspec based on the group and not the individual ID.
 // This calculates across containers in the od
 func PreparePodJobSpec(pod *v1.Pod, groupName string) *pb.PodSpec {
-	ps := new(pb.PodSpec)
-	ps.Id = groupName
+	podSpec := new(pb.PodSpec)
+	podSpec.Id = groupName
 
-	// Note from vsoch - there was an if check here to see if we had labels,
+	// There was an if check here to see if we had labels,
 	// I don't think there is risk to adding an empty list but we can add
 	// the check back if there is
-	ps.Labels = getPodJobspecLabels(pod)
+	podSpec.Labels = getPodJobspecLabels(pod)
 
 	// the jobname should be the group name
-	ps.Container = groupName
+	podSpec.Container = groupName
 
 	// Create accumulated requests for cpu and limits
 	// CPU and memory are summed across containers
@@ -87,12 +87,12 @@ func PreparePodJobSpec(pod *v1.Pod, groupName string) *pb.PodSpec {
 	if cpus == 0 {
 		cpus = 1
 	}
-	ps.Cpu = cpus
-	ps.Gpu = gpus
-	ps.Memory = memory
-	ps.Storage = storage
+	podSpec.Cpu = cpus
+	podSpec.Gpu = gpus
+	podSpec.Memory = memory
+	podSpec.Storage = storage
 
 	// I removed specRequests.Cpu().MilliValue() but we can add back some derivative if desired
-	klog.Infof("[Jobspec] Pod spec: CPU %v, memory %v, GPU %v, storage %v", ps.Cpu, ps.Memory, ps.Gpu, ps.Storage)
-	return ps
+	klog.Infof("[Jobspec] Pod spec: CPU %v, memory %v, GPU %v, storage %v", podSpec.Cpu, podSpec.Memory, podSpec.Gpu, podSpec.Storage)
+	return podSpec
 }
diff --git a/src/Makefile b/src/Makefile
index af5fcb3..e31c8ec 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -4,8 +4,8 @@ INSTALL_PREFIX ?= /usr
 LIB_PREFIX ?= /usr/lib
 LOCALBIN ?= $(shell pwd)/bin
 COMMONENVVAR=GOOS=$(shell uname -s | tr A-Z a-z)
-#BUILDENVVAR=CGO_CFLAGS="-I${FLUX_SCHED_ROOT}/resource/reapi/bindings/c" CGO_LDFLAGS="-L${INSTALL_PREFIX}/lib -L${FLUX_SCHED_ROOT}/resource -lresource -L${FLUX_SCHED_ROOT}/resource/libjobspec -ljobspec_conv -L/${FLUX_SCHED_ROOT}/resource/reapi/bindings -lreapi_cli -lflux-idset -lstdc++ -lczmq -ljansson -lhwloc -lboost_system -lflux-hostlist -lboost_graph -lyaml-cpp"
-BUILDENVVAR=CGO_CFLAGS="-I${FLUX_SCHED_ROOT} -I${FLUX_SCHED_ROOT}/resource/reapi/bindings/c" CGO_LDFLAGS="-L${LIB_PREFIX} -L${LIB_PREFIX}/flux -L${FLUX_SCHED_ROOT}/resource/reapi/bindings -lreapi_cli -lflux-idset -lstdc++ -lczmq -ljansson -lhwloc -lboost_system -lflux-hostlist -lboost_graph -lyaml-cpp"
+#BUILDENVVAR=CGO_CFLAGS="-I${FLUX_SCHED_ROOT}/resource/reapi/bindings/c" CGO_LDFLAGS="-L${INSTALL_PREFIX}/lib -L${FLUX_SCHED_ROOT}/resource -lresource -L${FLUX_SCHED_ROOT}/resource/libjobspec -ljobspec_conv -L/${FLUX_SCHED_ROOT}/resource/reapi/bindings -lreapi_cli -lflux-idset -lstdc++ -ljansson -lhwloc -lboost_system -lflux-hostlist -lboost_graph -lyaml-cpp"
+BUILDENVVAR=CGO_CFLAGS="-I${FLUX_SCHED_ROOT} -I${FLUX_SCHED_ROOT}/resource/reapi/bindings/c" CGO_LDFLAGS="-L${LIB_PREFIX} -L${LIB_PREFIX}/flux -L${FLUX_SCHED_ROOT}/resource/reapi/bindings -lreapi_cli -lflux-idset -lstdc++ -ljansson -lhwloc -lboost_system -lflux-hostlist -lboost_graph -lyaml-cpp"
 
 
 LOCAL_REGISTRY=localhost:5000
diff --git a/src/fluence/cmd/main.go b/src/fluence/cmd/main.go
index 3fb6a06..e8ef87d 100644
--- a/src/fluence/cmd/main.go
+++ b/src/fluence/cmd/main.go
@@ -48,12 +48,12 @@ func main() {
 	}
 
 	responsechan = make(chan string)
-	s := grpc.NewServer(
+	server := grpc.NewServer(
 		grpc.KeepaliveParams(keepalive.ServerParameters{
 			MaxConnectionIdle: 5 * time.Minute,
 		}),
 	)
-	pb.RegisterFluxcliServiceServer(s, &flux)
+	pb.RegisterFluxcliServiceServer(server, &flux)
 
 	// External plugin (Kubectl) GRPC
 	// This will eventually be an external GRPC module that can
@@ -64,11 +64,11 @@ func main() {
 	if *enableServicePlugin {
 		plugin := service.ExternalService{}
 		plugin.Init()
-		svcPb.RegisterExternalPluginServiceServer(s, &plugin)
+		svcPb.RegisterExternalPluginServiceServer(server, &plugin)
 	}
 
 	fmt.Printf("[GRPCServer] gRPC Listening on %s\n", lis.Addr().String())
-	if err := s.Serve(lis); err != nil {
+	if err := server.Serve(lis); err != nil {
 		fmt.Printf("[GRPCServer] failed to serve: %v\n", err)
 	}
 
diff --git a/src/fluence/fluxion/fluxion.go b/src/fluence/fluxion/fluxion.go
index 05e94fa..f288cdf 100644
--- a/src/fluence/fluxion/fluxion.go
+++ b/src/fluence/fluxion/fluxion.go
@@ -20,10 +20,10 @@ type Fluxion struct {
 }
 
 // InitFluxion creates a new client to interaction with the fluxion API (via go bindings)
-func (f *Fluxion) InitFluxion(policy *string, label *string) {
-	f.cli = fluxcli.NewReapiClient()
+func (fluxion *Fluxion) InitFluxion(policy *string, label *string) {
+	fluxion.cli = fluxcli.NewReapiClient()
 
-	klog.Infof("[Fluence] Created flux resource client %s", f.cli)
+	klog.Infof("[Fluence] Created flux resource client %s", fluxion.cli)
 	err := utils.CreateJGF(defaults.KubernetesJsonGraphFormat, label)
 	if err != nil {
 		return
@@ -40,26 +40,25 @@ func (f *Fluxion) InitFluxion(policy *string, label *string) {
 		p = string("{\"matcher_policy\": \"" + *policy + "\"}")
 		klog.Infof("[Fluence] match policy: %s", p)
 	}
-
-	f.cli.InitContext(string(jgf), p)
+	fluxion.cli.InitContext(string(jgf), p)
 }
 
 // Cancel wraps the Cancel function of the fluxion go bindings
-func (s *Fluxion) Cancel(ctx context.Context, in *pb.CancelRequest) (*pb.CancelResponse, error) {
+func (fluxion *Fluxion) Cancel(ctx context.Context, in *pb.CancelRequest) (*pb.CancelResponse, error) {
 
 	klog.Infof("[Fluence] received cancel request %v\n", in)
-	err := s.cli.Cancel(int64(in.JobID), true)
+	err := fluxion.cli.Cancel(int64(in.JobID), true)
 	if err != nil {
-		return nil, errors.New("Error in Cancel")
+		return nil, err
 	}
 
 	// Why would we have an error code here if we check above?
 	// This (I think) should be an error code for the specific job
 	dr := &pb.CancelResponse{JobID: in.JobID}
 	klog.Infof("[Fluence] sending cancel response %v\n", dr)
-	klog.Infof("[Fluence] cancel errors so far: %s\n", s.cli.GetErrMsg())
+	klog.Infof("[Fluence] cancel errors so far: %s\n", fluxion.cli.GetErrMsg())
 
-	reserved, at, overhead, mode, fluxerr := s.cli.Info(int64(in.JobID))
+	reserved, at, overhead, mode, fluxerr := fluxion.cli.Info(int64(in.JobID))
 	klog.Infof("\n\t----Job Info output---")
 	klog.Infof("jobid: %d\nreserved: %t\nat: %d\noverhead: %f\nmode: %s\nerror: %d\n", in.JobID, reserved, at, overhead, mode, fluxerr)
 
@@ -67,48 +66,27 @@ func (s *Fluxion) Cancel(ctx context.Context, in *pb.CancelRequest) (*pb.CancelR
 	return dr, nil
 }
 
-// generateJobSpec generates a jobspec for a match request and returns the string
-func (s *Fluxion) generateJobspec(in *pb.MatchRequest) ([]byte, error) {
-
-	spec := []byte{}
-
-	// Create a temporary file to write and read the jobspec
-	// The first parameter here as the empty string creates in /tmp
-	file, err := os.CreateTemp("", "jobspec.*.yaml")
-	if err != nil {
-		return spec, err
-	}
-	defer os.Remove(file.Name())
-	jobspec.CreateJobSpecYaml(in.Ps, in.Count, file.Name())
-
-	spec, err = os.ReadFile(file.Name())
-	if err != nil {
-		return spec, errors.New("Error reading jobspec")
-	}
-	return spec, err
-}
-
 // Match wraps the MatchAllocate function of the fluxion go bindings
 // If a match is not possible, we return the error and an empty response
-func (s *Fluxion) Match(ctx context.Context, in *pb.MatchRequest) (*pb.MatchResponse, error) {
+func (fluxion *Fluxion) Match(ctx context.Context, in *pb.MatchRequest) (*pb.MatchResponse, error) {
 
 	emptyResponse := &pb.MatchResponse{}
 
 	// Prepare an empty match response (that can still be serialized)
 	klog.Infof("[Fluence] Received Match request %v\n", in)
 
-	// Generate the jobspec, written to temporary file and read as string
-	spec, err := s.generateJobspec(in)
+	// Generate the jobspec, array of bytes converted to string
+	spec, err := jobspec.CreateJobSpecYaml(in.Ps, in.Count)
 	if err != nil {
 		return emptyResponse, err
 	}
 
 	// Ask flux to match allocate!
-	reserved, allocated, at, overhead, jobid, fluxerr := s.cli.MatchAllocate(false, string(spec))
+	reserved, allocated, at, overhead, jobid, fluxerr := fluxion.cli.MatchAllocate(false, string(spec))
 	utils.PrintOutput(reserved, allocated, at, overhead, jobid, fluxerr)
 
 	// Be explicit about errors (or not)
-	errorMessages := s.cli.GetErrMsg()
+	errorMessages := fluxion.cli.GetErrMsg()
 	if errorMessages == "" {
 		klog.Infof("[Fluence] There are no errors")
 	} else {
diff --git a/src/fluence/jgf/jgf.go b/src/fluence/jgf/jgf.go
index 1f45235..8a047f9 100644
--- a/src/fluence/jgf/jgf.go
+++ b/src/fluence/jgf/jgf.go
@@ -17,6 +17,7 @@ package jgf
 
 import (
 	"encoding/json"
+	"fmt"
 	"log"
 	"os"
 	"strconv"
@@ -26,13 +27,26 @@ import (
 var (
 	// Defaults for nodes
 	defaultExclusive = false
-	defaultRank      = -1
-	defaultSize      = 1
+	defaultRank      = int64(-1)
+	defaultSize      = int64(1)
 	defaultUnit      = ""
 
 	// Relations
-	containsRelation = "contains"
-	inRelation       = "in"
+	ContainsRelation = "contains"
+	InRelation       = "in"
+
+	// Vertex (node) types
+	// These are public to be used in the utils package
+	ClusterType     = "cluster"
+	NodeType        = "node"
+	CoreType        = "core"
+	VirtualCoreType = "vcore"
+	RackType        = "rack"
+	SocketType      = "socket"
+	SubnetType      = "subnet"
+	MemoryType      = "memory"
+	NvidiaGPU       = "nvidiagpu"
+	GPUType         = "gpu"
 
 	// Paths
 	containmentKey = "containment"
@@ -73,31 +87,20 @@ func (g *Fluxjgf) MakeEdge(source string, target string, contains string) {
 		},
 	}
 	g.Graph.Edges = append(g.Graph.Edges, newedge)
-	if contains == containsRelation {
+	if contains == ContainsRelation {
 		tnode := g.NodeMap[target]
 		tnode.Metadata.Paths[containmentKey] = g.NodeMap[source].Metadata.Paths[containmentKey] + "/" + tnode.Metadata.Name
 	}
 }
 
-// processLabels selects a subset based on a string filter
-func processLabels(labels *map[string]string, filter string) (filtered map[string]string) {
-	filtered = map[string]string{}
-	for key, v := range *labels {
-		if strings.Contains(key, filter) {
-			filtered[key] = v
-		}
-	}
-	return
-}
-
 // MakeSubnet creates a subnet for the graph
-func (g *Fluxjgf) MakeSubnet(index int, ip string) string {
+func (g *Fluxjgf) MakeSubnet(index int64, ip string) string {
 	newnode := node{
-		Id: strconv.Itoa(g.Elements),
+		Id: fmt.Sprintf("%d", g.Elements),
 		Metadata: nodeMetadata{
-			Type:      "subnet",
+			Type:      SubnetType,
 			Basename:  ip,
-			Name:      ip + strconv.Itoa(g.Elements),
+			Name:      ip + fmt.Sprintf("%d", g.Elements),
 			Id:        index,
 			Uniq_id:   g.Elements,
 			Rank:      defaultRank,
@@ -114,11 +117,11 @@ func (g *Fluxjgf) MakeSubnet(index int, ip string) string {
 // MakeNode creates a new node for the graph
 func (g *Fluxjgf) MakeNode(index int, exclusive bool, subnet string) string {
 	newnode := node{
-		Id: strconv.Itoa(g.Elements),
+		Id: fmt.Sprintf("%d", g.Elements),
 		Metadata: nodeMetadata{
-			Type:      "node",
+			Type:      NodeType,
 			Basename:  subnet,
-			Name:      subnet + strconv.Itoa(g.Elements),
+			Name:      subnet + fmt.Sprintf("%d", g.Elements),
 			Id:        g.Elements,
 			Uniq_id:   g.Elements,
 			Rank:      defaultRank,
@@ -133,13 +136,13 @@ func (g *Fluxjgf) MakeNode(index int, exclusive bool, subnet string) string {
 }
 
 // MakeSocket creates a socket for the graph
-func (g *Fluxjgf) MakeSocket(index int, name string) string {
+func (g *Fluxjgf) MakeSocket(index int64, name string) string {
 	newnode := node{
-		Id: strconv.Itoa(g.Elements),
+		Id: fmt.Sprintf("%d", g.Elements),
 		Metadata: nodeMetadata{
-			Type:      "socket",
+			Type:      SocketType,
 			Basename:  name,
-			Name:      name + strconv.Itoa(index),
+			Name:      name + fmt.Sprintf("%d", index),
 			Id:        index,
 			Uniq_id:   g.Elements,
 			Rank:      defaultRank,
@@ -154,13 +157,13 @@ func (g *Fluxjgf) MakeSocket(index int, name string) string {
 }
 
 // MakeCore creates a core for the graph
-func (g *Fluxjgf) MakeCore(index int, name string) string {
+func (g *Fluxjgf) MakeCore(index int64, name string) string {
 	newnode := node{
-		Id: strconv.Itoa(g.Elements),
+		Id: fmt.Sprintf("%d", g.Elements),
 		Metadata: nodeMetadata{
-			Type:      "core",
+			Type:      CoreType,
 			Basename:  name,
-			Name:      name + strconv.Itoa(index),
+			Name:      name + fmt.Sprintf("%d", index),
 			Id:        index,
 			Uniq_id:   g.Elements,
 			Rank:      defaultRank,
@@ -175,13 +178,13 @@ func (g *Fluxjgf) MakeCore(index int, name string) string {
 }
 
 // MakeVCore makes a vcore (I think 2 vcpu == 1 cpu) for the graph
-func (g *Fluxjgf) MakeVCore(coreid string, index int, name string) string {
+func (g *Fluxjgf) MakeVCore(coreid string, index int64, name string) string {
 	newnode := node{
-		Id: strconv.Itoa(g.Elements),
+		Id: fmt.Sprintf("%d", g.Elements),
 		Metadata: nodeMetadata{
-			Type:      "vcore",
+			Type:      VirtualCoreType,
 			Basename:  name,
-			Name:      name + strconv.Itoa(index),
+			Name:      name + fmt.Sprintf("%d", index),
 			Id:        index,
 			Uniq_id:   g.Elements,
 			Rank:      defaultRank,
@@ -192,13 +195,13 @@ func (g *Fluxjgf) MakeVCore(coreid string, index int, name string) string {
 		},
 	}
 	g.addNode(newnode)
-	g.MakeEdge(coreid, newnode.Id, containsRelation)
-	g.MakeEdge(newnode.Id, coreid, inRelation)
+	g.MakeEdge(coreid, newnode.Id, ContainsRelation)
+	g.MakeEdge(newnode.Id, coreid, InRelation)
 	return newnode.Id
 }
 
 // MakeNFProperties makes the node feature discovery properties for the graph
-func (g *Fluxjgf) MakeNFDProperties(coreid string, index int, filter string, labels *map[string]string) {
+func (g *Fluxjgf) MakeNFDProperties(coreid string, index int64, filter string, labels *map[string]string) {
 	for key, _ := range *labels {
 		if strings.Contains(key, filter) {
 			name := strings.Split(key, "/")[1]
@@ -207,11 +210,11 @@ func (g *Fluxjgf) MakeNFDProperties(coreid string, index int, filter string, lab
 			}
 
 			newnode := node{
-				Id: strconv.Itoa(g.Elements),
+				Id: fmt.Sprintf("%d", g.Elements),
 				Metadata: nodeMetadata{
 					Type:      name,
 					Basename:  name,
-					Name:      name + strconv.Itoa(index),
+					Name:      name + fmt.Sprintf("%d", index),
 					Id:        index,
 					Uniq_id:   g.Elements,
 					Rank:      defaultRank,
@@ -222,22 +225,22 @@ func (g *Fluxjgf) MakeNFDProperties(coreid string, index int, filter string, lab
 				},
 			}
 			g.addNode(newnode)
-			g.MakeEdge(coreid, newnode.Id, containsRelation)
+			g.MakeEdge(coreid, newnode.Id, ContainsRelation)
 		}
 	}
 }
 
-func (g *Fluxjgf) MakeNFDPropertiesByValue(coreid string, index int, filter string, labels *map[string]string) {
+func (g *Fluxjgf) MakeNFDPropertiesByValue(coreid string, index int64, filter string, labels *map[string]string) {
 	for key, val := range *labels {
 		if strings.Contains(key, filter) {
 			name := val
 
 			newnode := node{
-				Id: strconv.Itoa(g.Elements),
+				Id: fmt.Sprintf("%d", g.Elements),
 				Metadata: nodeMetadata{
 					Type:      name,
 					Basename:  name,
-					Name:      name + strconv.Itoa(index),
+					Name:      name + fmt.Sprintf("%d", index),
 					Id:        index,
 					Uniq_id:   g.Elements,
 					Rank:      defaultRank,
@@ -248,19 +251,19 @@ func (g *Fluxjgf) MakeNFDPropertiesByValue(coreid string, index int, filter stri
 				},
 			}
 			g.addNode(newnode)
-			g.MakeEdge(coreid, newnode.Id, containsRelation)
+			g.MakeEdge(coreid, newnode.Id, ContainsRelation)
 		}
 	}
 }
 
 // MakeMemory creates memory for the graph
-func (g *Fluxjgf) MakeMemory(index int, name string, unit string, size int) string {
+func (g *Fluxjgf) MakeMemory(index int64, name string, unit string, size int64) string {
 	newnode := node{
-		Id: strconv.Itoa(g.Elements),
+		Id: fmt.Sprintf("%d", g.Elements),
 		Metadata: nodeMetadata{
-			Type:      "memory",
+			Type:      MemoryType,
 			Basename:  name,
-			Name:      name + strconv.Itoa(index),
+			Name:      name + fmt.Sprintf("%d", index),
 			Id:        index,
 			Uniq_id:   g.Elements,
 			Rank:      defaultRank,
@@ -275,13 +278,13 @@ func (g *Fluxjgf) MakeMemory(index int, name string, unit string, size int) stri
 }
 
 // MakeGPU makes a gpu for the graph
-func (g *Fluxjgf) MakeGPU(index int, name string, size int) string {
+func (g *Fluxjgf) MakeGPU(index int64, name string, size int64) string {
 	newnode := node{
-		Id: strconv.Itoa(g.Elements),
+		Id: fmt.Sprintf("%d", g.Elements),
 		Metadata: nodeMetadata{
-			Type:      "gpu",
+			Type:      GPUType,
 			Basename:  name,
-			Name:      name + strconv.Itoa(index),
+			Name:      name + fmt.Sprintf("%d", index),
 			Id:        index,
 			Uniq_id:   g.Elements,
 			Rank:      defaultRank,
@@ -301,7 +304,7 @@ func (g *Fluxjgf) MakeCluster(clustername string) string {
 	newnode := node{
 		Id: strconv.Itoa(0),
 		Metadata: nodeMetadata{
-			Type:      "cluster",
+			Type:      ClusterType,
 			Basename:  clustername,
 			Name:      clustername + "0",
 			Id:        g.Elements,
@@ -320,14 +323,14 @@ func (g *Fluxjgf) MakeCluster(clustername string) string {
 }
 
 // MakeRack makes the rack
-func (g *Fluxjgf) MakeRack(id int) string {
+func (g *Fluxjgf) MakeRack(index int64) string {
 	newnode := node{
-		Id: strconv.Itoa(g.Elements),
+		Id: fmt.Sprintf("%d", g.Elements),
 		Metadata: nodeMetadata{
-			Type:      "rack",
-			Basename:  "rack",
-			Name:      "rack" + strconv.Itoa(id),
-			Id:        id,
+			Type:      RackType,
+			Basename:  RackType,
+			Name:      RackType + fmt.Sprintf("%d", index),
+			Id:        index,
 			Uniq_id:   g.Elements,
 			Rank:      defaultRank,
 			Exclusive: defaultExclusive,
diff --git a/src/fluence/jgf/types.go b/src/fluence/jgf/types.go
index b2b743f..21ccd00 100644
--- a/src/fluence/jgf/types.go
+++ b/src/fluence/jgf/types.go
@@ -38,12 +38,12 @@ type nodeMetadata struct {
 	Type       string            `json:"type"`
 	Basename   string            `json:"basename"`
 	Name       string            `json:"name"`
-	Id         int               `json:"id"`
-	Uniq_id    int               `json:"uniq_id"`
-	Rank       int               `json:"rank,omitempty"`
+	Id         int64             `json:"id"`
+	Uniq_id    int64             `json:"uniq_id"`
+	Rank       int64             `json:"rank,omitempty"`
 	Exclusive  bool              `json:"exclusive"`
 	Unit       string            `json:"unit"`
-	Size       int               `json:"size"`
+	Size       int64             `json:"size"`
 	Paths      map[string]string `json:"paths,omitempty"`
 	Properties map[string]string `json:"properties,omitempty"`
 }
@@ -57,6 +57,6 @@ type graph struct {
 
 type Fluxjgf struct {
 	Graph    graph           `json:"graph"`
-	Elements int             `json:"-"`
+	Elements int64           `json:"-"`
 	NodeMap  map[string]node `json:"-"`
 }
diff --git a/src/fluence/jobspec/jobspec.go b/src/fluence/jobspec/jobspec.go
index 683f586..96ed0fe 100644
--- a/src/fluence/jobspec/jobspec.go
+++ b/src/fluence/jobspec/jobspec.go
@@ -18,8 +18,6 @@ package jobspec
 import (
 	"fmt"
 	"log"
-	"math"
-	"os"
 
 	pb "github.com/flux-framework/flux-k8s/flux-plugin/fluence/fluxcli-grpc"
 	"gopkg.in/yaml.v2"
@@ -39,7 +37,7 @@ Ps: &pb.PodSpec{
 */
 
 // CreateJobSpecYaml writes the protobuf jobspec into a yaml file
-func CreateJobSpecYaml(spec *pb.PodSpec, count int32, filename string) error {
+func CreateJobSpecYaml(spec *pb.PodSpec, count int32) ([]byte, error) {
 
 	command := []string{spec.Container}
 	fmt.Println("Labels ", spec.Labels, " ", len(spec.Labels))
@@ -68,38 +66,9 @@ func CreateJobSpecYaml(spec *pb.PodSpec, count int32, filename string) error {
 	yamlbytes, err := yaml.Marshal(&js)
 	if err != nil {
 		log.Fatalf("[JobSpec] yaml.Marshal failed with '%s'\n", err)
-		return err
+		return yamlbytes, err
 	}
-	return writeBytes(yamlbytes, filename)
-}
-
-// WriteBytes writes a byte string to file
-func writeBytes(bytelist []byte, filename string) error {
-	fmt.Printf("[JobSpec] Preparing to write:\n%s\n", string(bytelist))
-	f, err := os.Create(filename)
-	if err != nil {
-		log.Fatalf("[JobSpec] Couldn't create file!!\n")
-		return err
-	}
-	defer f.Close()
-
-	_, err = f.Write(bytelist)
-	if err != nil {
-		log.Fatalf("[JobSpec] Couldn't write file!!\n")
-		return err
-	}
-
-	// Not sure why this is here, but will keep for now
-	_, err = f.WriteString("\n")
-	if err != nil {
-		log.Fatalf("[JobSpec] Couldn't append newline to file!!\n")
-	}
-	return err
-}
-
-func toGB(bytes int64) int64 {
-	res := float64(bytes) / math.Pow(10, 9)
-	return int64(res)
+	return yamlbytes, nil
 }
 
 // createSocketResources creates the socket resources for the JobSpec
diff --git a/src/fluence/utils/utils.go b/src/fluence/utils/utils.go
index e429056..490a0e0 100644
--- a/src/fluence/utils/utils.go
+++ b/src/fluence/utils/utils.go
@@ -93,23 +93,18 @@ func CreateJGF(filename string, skipLabel *string) error {
 	// Create a Flux Json Graph Format (JGF) with all cluster nodes
 	fluxgraph := jgf.InitJGF()
 
-	// TODO it looks like we can add more to the graph here -
-	// let's remember to consider what else we can.
-	// subnets := make(map[string]string)
-
+	// Top level of the graph is the cluster
+	// This assumes fluxion is only serving one cluster.
+	// previous comments indicate that we choose between the level
+	// of a rack and a subnet. A rack doesn't make sense (the nodes could
+	// be on multiple racks) so subnet is likely the right abstraction
 	cluster := fluxgraph.MakeCluster("k8scluster")
 
-	// Rack needs to be disabled when using subnets
-	// rack := fluxgraph.MakeRack(0)
-
-	// fluxgraph.MakeEdge(cluster, rack, "contains")
-	// fluxgraph.MakeEdge(rack, cluster, "in")
-
 	vcores := 0
 	fmt.Println("Number nodes ", len(nodes.Items))
 	var totalAllocCpu int64
 	totalAllocCpu = 0
-	sdnCount := 0
+	sdnCount := int64(0)
 
 	for nodeIndex, node := range nodes.Items {
 
@@ -146,13 +141,12 @@ func CreateJGF(filename string, skipLabel *string) error {
 			return err
 		}
 
-		// Check if subnet already exists
-		// Here we build subnets according to topology.kubernetes.io/zone label
+		// Here we build the subnet according to topology.kubernetes.io/zone label
 		subnetName := node.Labels["topology.kubernetes.io/zone"]
 		subnet := fluxgraph.MakeSubnet(sdnCount, subnetName)
 		sdnCount = sdnCount + 1
-		fluxgraph.MakeEdge(cluster, subnet, "contains")
-		fluxgraph.MakeEdge(subnet, cluster, "in")
+		fluxgraph.MakeEdge(cluster, subnet, jgf.ContainsRelation)
+		fluxgraph.MakeEdge(subnet, cluster, jgf.InRelation)
 
 		// These are requests for existing pods, for cpu and memory
 		reqs := computeTotalRequests(pods)
@@ -179,64 +173,44 @@ func CreateJGF(filename string, skipLabel *string) error {
 		fmt.Printf("      available mem: %d\n", availMem)
 		gpuAllocatable, hasGpuAllocatable := node.Status.Allocatable["nvidia.com/gpu"]
 
-		// reslist := node.Status.Allocatable
-		// resources := make([]corev1.ResourceName, 0, len(reslist))
-		// for resource := range reslist {
-		// 	fmt.Println("resource ", resource)
-		// 	resources = append(resources, resource)
-		// }
-		// for _, resource := range resources {
-		// 	value := reslist[resource]
-
-		// 	fmt.Printf(" %s:\t%s\n", resource, value.String())
-		// }
+		// TODO possibly look at pod resources vs. node.Status.Allocatable
 
 		workernode := fluxgraph.MakeNode(nodeIndex, false, node.Name)
-		fluxgraph.MakeEdge(subnet, workernode, "contains") // this is rack otherwise
-		fluxgraph.MakeEdge(workernode, subnet, "in")       // this is rack otherwise
-
-		// socket := fluxgraph.MakeSocket(0, "socket")
-		// fluxgraph.MakeEdge(workernode, socket, "contains")
-		// fluxgraph.MakeEdge(socket, workernode, "in")
+		fluxgraph.MakeEdge(subnet, workernode, jgf.ContainsRelation)
+		fluxgraph.MakeEdge(workernode, subnet, jgf.InRelation)
 
 		if hasGpuAllocatable {
 			fmt.Println("GPU Resource quantity ", gpuAllocatable.Value())
-			//MakeGPU(index int, name string, size int) string {
 			for index := 0; index < int(gpuAllocatable.Value()); index++ {
-				gpu := fluxgraph.MakeGPU(index, "nvidiagpu", 1)
-				fluxgraph.MakeEdge(workernode, gpu, "contains") // workernode was socket
-				fluxgraph.MakeEdge(gpu, workernode, "in")
+				gpu := fluxgraph.MakeGPU(int64(index), jgf.NvidiaGPU, 1)
+				fluxgraph.MakeEdge(workernode, gpu, jgf.ContainsRelation)
+				fluxgraph.MakeEdge(gpu, workernode, jgf.InRelation)
 			}
 
 		}
 
 		for index := 0; index < int(availCpu); index++ {
-			// MakeCore(index int, name string)
-			core := fluxgraph.MakeCore(index, "core")
-			fluxgraph.MakeEdge(workernode, core, "contains") // workernode was socket
-			fluxgraph.MakeEdge(core, workernode, "in")
+			core := fluxgraph.MakeCore(int64(index), jgf.CoreType)
+			fluxgraph.MakeEdge(workernode, core, jgf.ContainsRelation)
+			fluxgraph.MakeEdge(core, workernode, jgf.InRelation)
 
 			// Question from Vanessa:
 			// How can we get here and have vcores ever not equal to zero?
 			if vcores == 0 {
-				fluxgraph.MakeNFDProperties(core, index, "cpu-", &node.Labels)
-				// fluxgraph.MakeNFDProperties(core, index, "netmark-", &node.Labels)
+				fluxgraph.MakeNFDProperties(core, int64(index), "cpu-", &node.Labels)
 			} else {
-				for vc := 0; vc < vcores; vc++ {
-					vcore := fluxgraph.MakeVCore(core, vc, "vcore")
-					fluxgraph.MakeNFDProperties(vcore, index, "cpu-", &node.Labels)
+				for virtualCore := 0; virtualCore < vcores; virtualCore++ {
+					vcore := fluxgraph.MakeVCore(core, int64(virtualCore), jgf.VirtualCoreType)
+					fluxgraph.MakeNFDProperties(vcore, int64(index), "cpu-", &node.Labels)
 				}
 			}
 		}
 
-		// MakeMemory(index int, name string, unit string, size int)
 		fractionMem := availMem >> 30
-		// fractionmem := (totalmem/totalcpu) >> 20
-		// fmt.Println("Creating ", fractionmem, " vertices with ", 1<<10, " MB of mem")
-		for i := 0; i < /*int(totalcpu)*/ int(fractionMem); i++ {
-			mem := fluxgraph.MakeMemory(i, "memory", "MB", int(1<<10))
-			fluxgraph.MakeEdge(workernode, mem, "contains")
-			fluxgraph.MakeEdge(mem, workernode, "in")
+		for i := 0; i < int(fractionMem); i++ {
+			mem := fluxgraph.MakeMemory(int64(i), jgf.MemoryType, "MB", 1<<10)
+			fluxgraph.MakeEdge(workernode, mem, jgf.ContainsRelation)
+			fluxgraph.MakeEdge(mem, workernode, jgf.InRelation)
 		}
 	}
 	fmt.Printf("\nCan request at most %d exclusive cpu", totalAllocCpu)
@@ -248,6 +222,7 @@ func CreateJGF(filename string, skipLabel *string) error {
 
 }
 
+// computeTotalRequests sums up the pod requests for the list. We do not consider limits.
 func computeTotalRequests(podList *corev1.PodList) (total map[corev1.ResourceName]resource.Quantity) {
 	total = map[corev1.ResourceName]resource.Quantity{}
 	for _, pod := range podList.Items {
@@ -260,14 +235,6 @@ func computeTotalRequests(podList *corev1.PodList) (total map[corev1.ResourceNam
 				total[podReqName] = v
 			}
 		}
-		// for podLimitName, podLimitValue := range podLimits {
-		// 	if v, ok := total[podLimitName]; !ok {
-		// 		total[podLimitName] = podLimitValue
-		// 	} else {
-		// 		v.Add(podLimitValue)
-		// 		total[podLimitName] = v
-		// 	}
-		// }
 	}
 	return
 }
@@ -295,17 +262,17 @@ func ParseAllocResult(allocated, podName string) []allocation {
 	// Parse graph and nodes into interfaces
 	// TODO look at github.com/mitchellh/mapstructure
 	// that might make this easier
-	nodes := dat["graph"].(interface{})
+	nodes := dat["graph"]
 	str1 := nodes.(map[string]interface{})
 	str2 := str1["nodes"].([]interface{})
 
 	for _, item := range str2 {
 		str1 = item.(map[string]interface{})
 		metadata := str1["metadata"].(map[string]interface{})
-		if metadata["type"].(string) == "core" {
+		if metadata["type"].(string) == jgf.CoreType {
 			corecount = corecount + 1
 		}
-		if metadata["type"].(string) == "node" {
+		if metadata["type"].(string) == jgf.NodeType {
 			result = append(result, allocation{
 				Type:      metadata["type"].(string),
 				Name:      metadata["name"].(string),
@@ -334,6 +301,6 @@ func PrintOutput(reserved bool, allocated string, at int64, overhead float64, jo
 
 	// Only print error if we had one
 	if fluxerr != nil {
-		fmt.Printf("error: %w\n", fluxerr)
+		fmt.Printf("error: %s\n", fluxerr)
 	}
 }