opendatahub-io · HumairAK · Feb 25, 2025 · Feb 26, 2025 · Feb 28, 2025 · HumairAK
diff --git a/README.md b/README.md
@@ -211,7 +211,7 @@ To collaborate on this repository, please follow these steps:
     source .venv/bin/activate
     ```
 
-## Adding/Updating dependencies 
+## Adding/Updating dependencies
 
 When updating python package dependencies in `pyproject.toml`, regenerate [requirements.txt](requirements.txt):
 
@@ -225,7 +225,7 @@ For this you need [pybuild-deps](https://pybuild-deps.readthedocs.io/en/latest/u
 Temporarily remove `kfp-pipeline-spec` from `requirement.txt`. And run:
 
 ```bash
-pybuild-deps compile requirements.txt -o requirements-build.txt 
+pybuild-deps compile requirements.txt -o requirements-build.txt
 ```
 
 > Note that, we do this because `kfp-pipeline-spec` only includes wheels and not the sources, this breaks

diff --git a/eval/final.py b/eval/final.py
@@ -20,21 +20,21 @@ def run_final_eval_op(
     sdg_path: str = "/input/sdg",
     mmlu_branch_output_path: str = "/output/mmlu_branch",
     mt_bench_branch_output_path: str = "/output/mt_bench_branch",
+    judge_secret_name: str = None,
 ):
+    import base64
     import json
     import os
     import subprocess
     from pathlib import Path
 
     import httpx
+    import requests
     import torch
     from instructlab.eval.mmlu import MMLUBranchEvaluator
     from instructlab.eval.mt_bench import MTBenchBranchEvaluator
     from instructlab.model.evaluate import qa_pairs_to_qna_to_avg_scores, sort_score
 
-    judge_api_key = os.getenv("JUDGE_API_KEY", "")
-    judge_model_name = os.getenv("JUDGE_NAME")
-    judge_endpoint = os.getenv("JUDGE_ENDPOINT")
     judge_ca_cert_path = os.getenv("JUDGE_CA_CERT_PATH")
     use_tls = os.path.exists(judge_ca_cert_path) and (
         os.path.getsize(judge_ca_cert_path) > 0
@@ -341,9 +341,49 @@ def find_node_dataset_directories(base_dir: str):
 
     print("Starting MT_BENCH_BRANCH ...")
 
-    judge_api_key = os.getenv("JUDGE_API_KEY", "")
-    judge_model_name = os.getenv("JUDGE_NAME")
-    judge_endpoint = os.getenv("JUDGE_ENDPOINT")
+    def fetch_secret(secret_name, keys):
+        # Kubernetes API server inside the cluster
+        K8S_API_SERVER = "https://kubernetes.default.svc"
+        NAMESPACE_PATH = "/var/run/secrets/kubernetes.io/serviceaccount/namespace"
+        TOKEN_PATH = "/var/run/secrets/kubernetes.io/serviceaccount/token"
+
+        # Fetch namespace
+        try:
+            with open(NAMESPACE_PATH, "r") as f:
+                namespace = f.read().strip()
+        except FileNotFoundError:
+            raise RuntimeError("Error reading namespace")
+
+        # Fetch service account token
+        try:
+            with open(TOKEN_PATH, "r") as f:
+                token = f.read().strip()
+        except FileNotFoundError:
+            raise RuntimeError("Error reading service account token")
+
+        headers = {"Authorization": f"Bearer {token}", "Accept": "application/json"}
+        verify_tls = "/var/run/secrets/kubernetes.io/serviceaccount/ca.crt"
+        url = f"{K8S_API_SERVER}/api/v1/namespaces/{namespace}/secrets/{secret_name}"
+        response = requests.get(url, headers=headers, verify=verify_tls)
+
+        if response.status_code == 200:
+            secret_data = response.json().get("data", {})
+            return [base64.b64decode(secret_data[key]).decode() for key in keys]
+        else:
+            raise RuntimeError(
+                f"Error fetching secret: {response.status_code} {response.text}"
+            )
+
+    if judge_secret_name is None:
+        judge_api_key = os.getenv("JUDGE_API_KEY", "")
+        judge_model_name = os.getenv("JUDGE_NAME")
+        judge_endpoint = os.getenv("JUDGE_ENDPOINT")
+    else:
+        print("Eval Judge secret specified, fetching...")
+        judge_api_key, judge_model_name, judge_endpoint = fetch_secret(
+            judge_secret_name, ["api_token", "model_name", "endpoint"]
+        )
+        print("Eval Judge secret data retrieved.")
 
     output_dir = "/tmp/eval_output"
 

diff --git a/eval/mt_bench.py b/eval/mt_bench.py
@@ -17,18 +17,62 @@ def run_mt_bench_op(
     max_workers: str,
     models_folder: str,
     output_path: str = "/output/mt_bench_data.json",
+    judge_secret_name: str = None,
 ) -> NamedTuple("outputs", best_model=str, best_score=float):
+    import base64
     import json
     import os
     import subprocess
 
     import httpx
+    import requests
     import torch
     from instructlab.eval.mt_bench import MTBenchEvaluator
 
-    judge_api_key = os.getenv("JUDGE_API_KEY", "")
-    judge_model_name = os.getenv("JUDGE_NAME")
-    judge_endpoint = os.getenv("JUDGE_ENDPOINT")
+    def fetch_secret(secret_name, keys):
+        # Kubernetes API server inside the cluster
+        K8S_API_SERVER = "https://kubernetes.default.svc"
+        NAMESPACE_PATH = "/var/run/secrets/kubernetes.io/serviceaccount/namespace"
+        TOKEN_PATH = "/var/run/secrets/kubernetes.io/serviceaccount/token"
+
+        # Fetch namespace
+        try:
+            with open(NAMESPACE_PATH, "r") as f:
+                namespace = f.read().strip()
+        except FileNotFoundError:
+            raise RuntimeError("Error reading namespace")
+
+        # Fetch service account token
+        try:
+            with open(TOKEN_PATH, "r") as f:
+                token = f.read().strip()
+        except FileNotFoundError:
+            raise RuntimeError("Error reading service account token")
+
+        headers = {"Authorization": f"Bearer {token}", "Accept": "application/json"}
+        verify_tls = "/var/run/secrets/kubernetes.io/serviceaccount/ca.crt"
+        url = f"{K8S_API_SERVER}/api/v1/namespaces/{namespace}/secrets/{secret_name}"
+        response = requests.get(url, headers=headers, verify=verify_tls)
+
+        if response.status_code == 200:
+            secret_data = response.json().get("data", {})
+            return [base64.b64decode(secret_data[key]).decode() for key in keys]
+        else:
+            raise RuntimeError(
+                f"Error fetching secret: {response.status_code} {response.text}"
+            )
+
+    if judge_secret_name is None:
+        judge_api_key = os.getenv("JUDGE_API_KEY", "")
+        judge_model_name = os.getenv("JUDGE_NAME")
+        judge_endpoint = os.getenv("JUDGE_ENDPOINT")
+    else:
+        print("Eval Judge secret specified, fetching...")
+        judge_api_key, judge_model_name, judge_endpoint = fetch_secret(
+            judge_secret_name, ["api_token", "model_name", "endpoint"]
+        )
+        print("Eval Judge secret data retrieved.")
+
     judge_ca_cert_path = os.getenv("JUDGE_CA_CERT_PATH")
     use_tls = os.path.exists(judge_ca_cert_path) and (
         os.path.getsize(judge_ca_cert_path) > 0

diff --git a/pipeline.py b/pipeline.py
@@ -124,6 +124,7 @@ def ilab_pipeline(
     eval_judge_secret: str = "judge-secret",
     # Other options
     k8s_storage_class_name: str = "standard",  # FIXME: https://github.com/kubeflow/pipelines/issues/11396, https://issues.redhat.com/browse/RHOAIRFE-470
+    k8s_storage_size: str = "100Gi",
 ):
     """InstructLab pipeline
 
@@ -179,13 +180,14 @@ def ilab_pipeline(
         eval_judge_secret: General evaluation parameter: The name of the k8s secret key holding access credentials to the judge server.
 
         k8s_storage_class_name: A Kubernetes StorageClass name for persistent volumes. Selected StorageClass must support RWX PersistentVolumes.
+        k8s_storage_size: The storage size of the persistent volume used for data passing within the pipeline.
     """
 
     # SDG stage
     sdg_input_pvc_task = CreatePVC(
         pvc_name_suffix="-sdg",
         access_modes=["ReadWriteMany"],
-        size="10Gi",
+        size=k8s_storage_size,
         storage_class_name=k8s_storage_class_name,
     )
     git_clone_task = git_clone_op(
@@ -213,13 +215,10 @@ def ilab_pipeline(
         repo_branch=sdg_repo_branch,
         repo_pr=sdg_repo_pr,
         sdg_sampling_size=sdg_sample_size,
+        sdg_secret_name=sdg_teacher_secret,
     )
     sdg_task.set_env_variable("HOME", "/tmp")
     sdg_task.set_env_variable("HF_HOME", "/tmp")
-    use_config_map_as_env(
-        sdg_task, TEACHER_CONFIG_MAP, dict(endpoint="endpoint", model="model")
-    )
-    use_secret_as_env(sdg_task, TEACHER_SECRET, {"api_key": "api_key"})
     use_config_map_as_volume(sdg_task, TEACHER_CONFIG_MAP, mount_path=SDG_CA_CERT_PATH)
     sdg_task.set_env_variable(
         SDG_CA_CERT_ENV_VAR_NAME, os.path.join(SDG_CA_CERT_PATH, SDG_CA_CERT_CM_KEY)
@@ -260,7 +259,7 @@ def ilab_pipeline(
     model_pvc_task = CreatePVC(
         pvc_name_suffix="-model-cache",
         access_modes=["ReadWriteMany"],
-        size="100Gi",
+        size=k8s_storage_size,
         storage_class_name=k8s_storage_class_name,
     )
 
@@ -309,7 +308,7 @@ def ilab_pipeline(
     output_pvc_task = CreatePVC(
         pvc_name_suffix="-output",
         access_modes=["ReadWriteMany"],
-        size="100Gi",
+        size=k8s_storage_size,
         storage_class_name=k8s_storage_class_name,
     )
 
@@ -380,6 +379,7 @@ def ilab_pipeline(
         models_folder="/output/phase_2/model/hf_format",
         max_workers=mt_bench_max_workers,
         merge_system_user_message=mt_bench_merge_system_user_message,
+        judge_secret_name=eval_judge_secret,
     )
     mount_pvc(
         task=run_mt_bench_task,
@@ -392,12 +392,6 @@ def ilab_pipeline(
     run_mt_bench_task.set_accelerator_limit(1)
     run_mt_bench_task.set_caching_options(False)
     run_mt_bench_task.after(training_phase_2)
-    use_config_map_as_env(
-        run_mt_bench_task,
-        JUDGE_CONFIG_MAP,
-        dict(endpoint="JUDGE_ENDPOINT", model="JUDGE_NAME"),
-    )
-    use_secret_as_env(run_mt_bench_task, JUDGE_SECRET, {"api_key": "JUDGE_API_KEY"})
 
     use_config_map_as_volume(
         run_mt_bench_task, JUDGE_CONFIG_MAP, mount_path=JUDGE_CA_CERT_PATH
@@ -420,6 +414,7 @@ def ilab_pipeline(
         merge_system_user_message=final_eval_merge_system_user_message,
         few_shots=final_eval_few_shots,
         batch_size=final_eval_batch_size,
+        judge_secret_name=eval_judge_secret,
     )
     mount_pvc(
         task=final_eval_task, pvc_name=output_pvc_task.output, mount_path="/output"
@@ -435,20 +430,12 @@ def ilab_pipeline(
         mount_path="/model",
     )
 
-    use_config_map_as_env(
-        final_eval_task,
-        JUDGE_CONFIG_MAP,
-        dict(endpoint="JUDGE_ENDPOINT", model="JUDGE_NAME"),
-    )
-
     final_eval_task.set_env_variable("HOME", "/tmp")
     final_eval_task.set_env_variable("HF_HOME", "/tmp")
 
     # uncomment if updating image with same tag
     # set_image_pull_policy(final_eval_task, "Always")
 
-    use_secret_as_env(final_eval_task, JUDGE_SECRET, {"api_key": "JUDGE_API_KEY"})
-
     use_config_map_as_volume(
         final_eval_task, JUDGE_CONFIG_MAP, mount_path=JUDGE_CA_CERT_PATH
     )