|
15 | 15 | package argocompiler
|
16 | 16 |
|
17 | 17 | import (
|
| 18 | + "crypto/sha256" |
| 19 | + "encoding/hex" |
| 20 | + "encoding/json" |
18 | 21 | "fmt"
|
19 | 22 | "strings"
|
20 | 23 |
|
21 | 24 | wfapi "github.com/argoproj/argo-workflows/v3/pkg/apis/workflow/v1alpha1"
|
22 | 25 | "github.com/kubeflow/pipelines/api/v2alpha1/go/pipelinespec"
|
23 | 26 | "github.com/kubeflow/pipelines/backend/src/v2/compiler"
|
| 27 | + log "github.com/sirupsen/logrus" |
24 | 28 | "google.golang.org/protobuf/proto"
|
25 | 29 | "google.golang.org/protobuf/types/known/structpb"
|
26 | 30 | k8score "k8s.io/api/core/v1"
|
@@ -63,7 +67,7 @@ func Compile(jobArg *pipelinespec.PipelineJob, kubernetesSpecArg *pipelinespec.S
|
63 | 67 | if err != nil {
|
64 | 68 | return nil, err
|
65 | 69 | }
|
66 |
| - // fill root component default paramters to PipelineJob |
| 70 | + // fill root component default parameters to PipelineJob |
67 | 71 | specParams := spec.GetRoot().GetInputDefinitions().GetParameters()
|
68 | 72 | for name, param := range specParams {
|
69 | 73 | _, ok := job.RuntimeConfig.ParameterValues[name]
|
@@ -108,6 +112,9 @@ func Compile(jobArg *pipelinespec.PipelineJob, kubernetesSpecArg *pipelinespec.S
|
108 | 112 | "pipelines.kubeflow.org/v2_component": "true",
|
109 | 113 | },
|
110 | 114 | },
|
| 115 | + Arguments: wfapi.Arguments{ |
| 116 | + Parameters: []wfapi.Parameter{}, |
| 117 | + }, |
111 | 118 | ServiceAccountName: "pipeline-runner",
|
112 | 119 | Entrypoint: tmplEntrypoint,
|
113 | 120 | },
|
@@ -180,69 +187,134 @@ func (c *workflowCompiler) templateName(componentName string) string {
|
180 | 187 | return componentName
|
181 | 188 | }
|
182 | 189 |
|
183 |
| -// WIP: store component spec, task spec and executor spec in annotations |
184 |
| - |
185 | 190 | const (
|
186 |
| - annotationComponents = "pipelines.kubeflow.org/components-" |
187 |
| - annotationContainers = "pipelines.kubeflow.org/implementations-" |
188 |
| - annotationKubernetesSpec = "pipelines.kubeflow.org/kubernetes-" |
| 191 | + argumentsComponents = "components-" |
| 192 | + argumentsContainers = "implementations-" |
| 193 | + argumentsKubernetesSpec = "kubernetes-" |
189 | 194 | )
|
190 | 195 |
|
191 | 196 | func (c *workflowCompiler) saveComponentSpec(name string, spec *pipelinespec.ComponentSpec) error {
|
192 |
| - return c.saveProtoToAnnotation(annotationComponents+name, spec) |
| 197 | + hashedComponent := c.hashComponentContainer(name) |
| 198 | + |
| 199 | + return c.saveProtoToArguments(argumentsComponents+hashedComponent, spec) |
193 | 200 | }
|
194 | 201 |
|
195 | 202 | // useComponentSpec returns a placeholder we can refer to the component spec
|
196 | 203 | // in argo workflow fields.
|
197 | 204 | func (c *workflowCompiler) useComponentSpec(name string) (string, error) {
|
198 |
| - return c.annotationPlaceholder(annotationComponents + name) |
| 205 | + hashedComponent := c.hashComponentContainer(name) |
| 206 | + |
| 207 | + return c.argumentsPlaceholder(argumentsComponents + hashedComponent) |
199 | 208 | }
|
200 | 209 |
|
201 | 210 | func (c *workflowCompiler) saveComponentImpl(name string, msg proto.Message) error {
|
202 |
| - return c.saveProtoToAnnotation(annotationContainers+name, msg) |
| 211 | + hashedComponent := c.hashComponentContainer(name) |
| 212 | + |
| 213 | + return c.saveProtoToArguments(argumentsContainers+hashedComponent, msg) |
203 | 214 | }
|
204 | 215 |
|
205 | 216 | func (c *workflowCompiler) useComponentImpl(name string) (string, error) {
|
206 |
| - return c.annotationPlaceholder(annotationContainers + name) |
| 217 | + hashedComponent := c.hashComponentContainer(name) |
| 218 | + |
| 219 | + return c.argumentsPlaceholder(argumentsContainers + hashedComponent) |
207 | 220 | }
|
208 | 221 |
|
209 | 222 | func (c *workflowCompiler) saveKubernetesSpec(name string, spec *structpb.Struct) error {
|
210 |
| - return c.saveProtoToAnnotation(annotationKubernetesSpec+name, spec) |
| 223 | + return c.saveProtoToArguments(argumentsKubernetesSpec+name, spec) |
211 | 224 | }
|
212 | 225 |
|
213 | 226 | func (c *workflowCompiler) useKubernetesImpl(name string) (string, error) {
|
214 |
| - return c.annotationPlaceholder(annotationKubernetesSpec + name) |
| 227 | + return c.argumentsPlaceholder(argumentsKubernetesSpec + name) |
215 | 228 | }
|
216 | 229 |
|
217 |
| -// TODO(Bobgy): sanitize component name |
218 |
| -func (c *workflowCompiler) saveProtoToAnnotation(name string, msg proto.Message) error { |
| 230 | +// saveProtoToArguments saves a proto message to the workflow arguments. The |
| 231 | +// message is serialized to JSON and stored in the workflow arguments and then |
| 232 | +// referenced by the workflow templates using AWF templating syntax. The reason |
| 233 | +// for storing it in the workflow arguments is because there is a 1-many |
| 234 | +// relationship between components and tasks that reference them. The workflow |
| 235 | +// arguments allow us to deduplicate the component logic (implementation & spec |
| 236 | +// in IR), significantly reducing the size of the argo workflow manifest. |
| 237 | +func (c *workflowCompiler) saveProtoToArguments(componentName string, msg proto.Message) error { |
219 | 238 | if c == nil {
|
220 | 239 | return fmt.Errorf("compiler is nil")
|
221 | 240 | }
|
222 |
| - if c.wf.Annotations == nil { |
223 |
| - c.wf.Annotations = make(map[string]string) |
| 241 | + if c.wf.Spec.Arguments.Parameters == nil { |
| 242 | + c.wf.Spec.Arguments = wfapi.Arguments{Parameters: []wfapi.Parameter{}} |
224 | 243 | }
|
225 |
| - if _, alreadyExists := c.wf.Annotations[name]; alreadyExists { |
226 |
| - return fmt.Errorf("annotation %q already exists", name) |
| 244 | + if c.wf.Spec.Arguments.GetParameterByName(componentName) != nil { |
| 245 | + return nil |
227 | 246 | }
|
228 | 247 | json, err := stablyMarshalJSON(msg)
|
229 | 248 | if err != nil {
|
230 |
| - return fmt.Errorf("saving component spec of %q to annotations: %w", name, err) |
| 249 | + return fmt.Errorf("saving component spec of %q to arguments: %w", componentName, err) |
231 | 250 | }
|
232 |
| - // TODO(Bobgy): verify name adheres to Kubernetes annotation restrictions: https://kubernetes.io/docs/concepts/overview/working-with-objects/annotations/#syntax-and-character-set |
233 |
| - c.wf.Annotations[name] = json |
| 251 | + c.wf.Spec.Arguments.Parameters = append(c.wf.Spec.Arguments.Parameters, wfapi.Parameter{ |
| 252 | + Name: componentName, |
| 253 | + Value: wfapi.AnyStringPtr(json), |
| 254 | + }) |
234 | 255 | return nil
|
235 | 256 | }
|
236 | 257 |
|
237 |
| -func (c *workflowCompiler) annotationPlaceholder(name string) (string, error) { |
| 258 | +// argumentsPlaceholder checks for the unique component name within the workflow |
| 259 | +// arguments and returns a template tag that references the component in the |
| 260 | +// workflow arguments. |
| 261 | +func (c *workflowCompiler) argumentsPlaceholder(componentName string) (string, error) { |
238 | 262 | if c == nil {
|
239 | 263 | return "", fmt.Errorf("compiler is nil")
|
240 | 264 | }
|
241 |
| - if _, exists := c.wf.Annotations[name]; !exists { |
242 |
| - return "", fmt.Errorf("using component spec: failed to find annotation %q", name) |
| 265 | + if c.wf.Spec.Arguments.GetParameterByName(componentName) == nil { |
| 266 | + return "", fmt.Errorf("using component spec: failed to find workflow parameter %q", componentName) |
| 267 | + } |
| 268 | + |
| 269 | + return workflowParameter(componentName), nil |
| 270 | +} |
| 271 | + |
| 272 | +// hashComponentContainer serializes and hashes the container field of a given |
| 273 | +// component. |
| 274 | +func (c *workflowCompiler) hashComponentContainer(componentName string) string { |
| 275 | + log.Debug("componentName: ", componentName) |
| 276 | + // Return early for root component since it has no command and args. |
| 277 | + if componentName == "root" { |
| 278 | + return componentName |
243 | 279 | }
|
244 |
| - // Reference: https://argoproj.github.io/argo-workflows/variables/ |
245 |
| - return fmt.Sprintf("{{workflow.annotations.%s}}", name), nil |
| 280 | + if c.executors != nil { // Don't bother if there are no executors in the pipeline spec. |
| 281 | + // Look up the executorLabel for the component in question. |
| 282 | + executorLabel := c.spec.Components[componentName].GetExecutorLabel() |
| 283 | + log.Debug("executorLabel: ", executorLabel) |
| 284 | + // Iterate through the list of executors. |
| 285 | + for executorName, executorValue := range c.executors { |
| 286 | + log.Debug("executorName: ", executorName) |
| 287 | + // If one of them matches the executorLabel we extracted earlier... |
| 288 | + if executorName == executorLabel { |
| 289 | + // Get the corresponding container. |
| 290 | + container := executorValue.GetContainer() |
| 291 | + if container != nil { |
| 292 | + containerHash, err := hashValue(container) |
| 293 | + if err != nil { |
| 294 | + // Do not bubble up since this is not a breaking error |
| 295 | + // and we can just return the componentName in full. |
| 296 | + log.Debug("Error hashing container: ", err) |
| 297 | + } |
| 298 | + |
| 299 | + return containerHash |
| 300 | + } |
| 301 | + } |
| 302 | + } |
| 303 | + } |
| 304 | + |
| 305 | + return componentName |
| 306 | +} |
| 307 | + |
| 308 | +// hashValue serializes and hashes a provided value. |
| 309 | +func hashValue(value interface{}) (string, error) { |
| 310 | + bytes, err := json.Marshal(value) |
| 311 | + if err != nil { |
| 312 | + return "", err |
| 313 | + } |
| 314 | + h := sha256.New() |
| 315 | + h.Write([]byte(bytes)) |
| 316 | + |
| 317 | + return hex.EncodeToString(h.Sum(nil)), nil |
246 | 318 | }
|
247 | 319 |
|
248 | 320 | const (
|
|
0 commit comments