Skip to content

Commit

Permalink
parameterize secret names
Browse files Browse the repository at this point in the history
this commit adds logic to parameterize secret names for:

* judge server for both eval phases
* teacher server for sdg phase

the same fetch_secret() function is duplicated to ensure that we are not
passing secret data around as input/output parameters/artifacts. Doing
the latter would result in user secrets being stored in mlmd/object
store which we should avoid. In the future this logic will be replaced
with kfp built in secret mounting once it supports parameterization, a
lot of the duplicated logic will be removed.

We also perform rest requests against the host cluster because access to
kubernetes python package is not guaranteed.

Signed-off-by: Humair Khan <HumairAK@users.noreply.github.com>
  • Loading branch information
HumairAK committed Feb 26, 2025
1 parent 1bf0b73 commit 10c1cf5
Show file tree
Hide file tree
Showing 8 changed files with 323 additions and 112 deletions.
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -211,7 +211,7 @@ To collaborate on this repository, please follow these steps:
source .venv/bin/activate
```

## Adding/Updating dependencies
## Adding/Updating dependencies

When updating python package dependencies in `pyproject.toml`, regenerate [requirements.txt](requirements.txt):

Expand All @@ -225,7 +225,7 @@ For this you need [pybuild-deps](https://pybuild-deps.readthedocs.io/en/latest/u
Temporarily remove `kfp-pipeline-spec` from `requirement.txt`. And run:

```bash
pybuild-deps compile requirements.txt -o requirements-build.txt
pybuild-deps compile requirements.txt -o requirements-build.txt
```

> Note that, we do this because `kfp-pipeline-spec` only includes wheels and not the sources, this breaks
Expand Down
53 changes: 47 additions & 6 deletions eval/final.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,21 +20,22 @@ def run_final_eval_op(
sdg_path: str = "/input/sdg",
mmlu_branch_output_path: str = "/output/mmlu_branch",
mt_bench_branch_output_path: str = "/output/mt_bench_branch",
judge_secret_name: str = None,
):
import base64
import json
import os
import subprocess
from pathlib import Path

import httpx
import requests
import torch
from instructlab.eval.mmlu import MMLUBranchEvaluator
from instructlab.eval.mt_bench import MTBenchBranchEvaluator
from instructlab.model.evaluate import qa_pairs_to_qna_to_avg_scores, sort_score
from kubernetes import client, config

judge_api_key = os.getenv("JUDGE_API_KEY", "")
judge_model_name = os.getenv("JUDGE_NAME")
judge_endpoint = os.getenv("JUDGE_ENDPOINT")
judge_ca_cert_path = os.getenv("JUDGE_CA_CERT_PATH")
use_tls = os.path.exists(judge_ca_cert_path) and (
os.path.getsize(judge_ca_cert_path) > 0
Expand Down Expand Up @@ -341,9 +342,49 @@ def find_node_dataset_directories(base_dir: str):

print("Starting MT_BENCH_BRANCH ...")

judge_api_key = os.getenv("JUDGE_API_KEY", "")
judge_model_name = os.getenv("JUDGE_NAME")
judge_endpoint = os.getenv("JUDGE_ENDPOINT")
def fetch_secret(secret_name, keys):
# Kubernetes API server inside the cluster
K8S_API_SERVER = "https://kubernetes.default.svc"
NAMESPACE_PATH = "/var/run/secrets/kubernetes.io/serviceaccount/namespace"
TOKEN_PATH = "/var/run/secrets/kubernetes.io/serviceaccount/token"

# Fetch namespace
try:
with open(NAMESPACE_PATH, "r") as f:
namespace = f.read().strip()
except FileNotFoundError:
raise RuntimeError("Error reading namespace")

# Fetch service account token
try:
with open(TOKEN_PATH, "r") as f:
token = f.read().strip()
except FileNotFoundError:
raise RuntimeError("Error reading service account token")

headers = {"Authorization": f"Bearer {token}", "Accept": "application/json"}
verify_tls = "/var/run/secrets/kubernetes.io/serviceaccount/ca.crt"
url = f"{K8S_API_SERVER}/api/v1/namespaces/{namespace}/secrets/{secret_name}"
response = requests.get(url, headers=headers, verify=verify_tls)

if response.status_code == 200:
secret_data = response.json().get("data", {})
return [base64.b64decode(secret_data[key]).decode() for key in keys]
else:
raise RuntimeError(
f"Error fetching secret: {response.status_code} {response.text}"
)

if judge_secret_name is None:
judge_api_key = os.getenv("JUDGE_API_KEY", "")
judge_model_name = os.getenv("JUDGE_NAME")
judge_endpoint = os.getenv("JUDGE_ENDPOINT")
else:
print("Eval Judge secret specified, fetching...")
judge_api_key, judge_model_name, judge_endpoint = fetch_secret(
judge_secret_name, ["JUDGE_API_KEY", "JUDGE_NAME", "JUDGE_ENDPOINT"]
)
print("Eval Judge secret data retrieved.")

output_dir = "/tmp/eval_output"

Expand Down
51 changes: 48 additions & 3 deletions eval/mt_bench.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,18 +17,63 @@ def run_mt_bench_op(
max_workers: str,
models_folder: str,
output_path: str = "/output/mt_bench_data.json",
judge_secret_name: str = None,
) -> NamedTuple("outputs", best_model=str, best_score=float):
import base64
import json
import os
import subprocess

import httpx
import requests
import torch
from instructlab.eval.mt_bench import MTBenchEvaluator
from kubernetes import client, config

def fetch_secret(secret_name, keys):
# Kubernetes API server inside the cluster
K8S_API_SERVER = "https://kubernetes.default.svc"
NAMESPACE_PATH = "/var/run/secrets/kubernetes.io/serviceaccount/namespace"
TOKEN_PATH = "/var/run/secrets/kubernetes.io/serviceaccount/token"

# Fetch namespace
try:
with open(NAMESPACE_PATH, "r") as f:
namespace = f.read().strip()
except FileNotFoundError:
raise RuntimeError("Error reading namespace")

# Fetch service account token
try:
with open(TOKEN_PATH, "r") as f:
token = f.read().strip()
except FileNotFoundError:
raise RuntimeError("Error reading service account token")

headers = {"Authorization": f"Bearer {token}", "Accept": "application/json"}
verify_tls = "/var/run/secrets/kubernetes.io/serviceaccount/ca.crt"
url = f"{K8S_API_SERVER}/api/v1/namespaces/{namespace}/secrets/{secret_name}"
response = requests.get(url, headers=headers, verify=verify_tls)

if response.status_code == 200:
secret_data = response.json().get("data", {})
return [base64.b64decode(secret_data[key]).decode() for key in keys]
else:
raise RuntimeError(
f"Error fetching secret: {response.status_code} {response.text}"
)

if judge_secret_name is None:
judge_api_key = os.getenv("JUDGE_API_KEY", "")
judge_model_name = os.getenv("JUDGE_NAME")
judge_endpoint = os.getenv("JUDGE_ENDPOINT")
else:
print("Eval Judge secret specified, fetching...")
judge_api_key, judge_model_name, judge_endpoint = fetch_secret(
judge_secret_name, ["JUDGE_API_KEY", "JUDGE_NAME", "JUDGE_ENDPOINT"]
)
print("Eval Judge secret data retrieved.")

judge_api_key = os.getenv("JUDGE_API_KEY", "")
judge_model_name = os.getenv("JUDGE_NAME")
judge_endpoint = os.getenv("JUDGE_ENDPOINT")
judge_ca_cert_path = os.getenv("JUDGE_CA_CERT_PATH")
use_tls = os.path.exists(judge_ca_cert_path) and (
os.path.getsize(judge_ca_cert_path) > 0
Expand Down
3 changes: 3 additions & 0 deletions pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -213,6 +213,7 @@ def ilab_pipeline(
repo_branch=sdg_repo_branch,
repo_pr=sdg_repo_pr,
sdg_sampling_size=sdg_sample_size,
sdg_secret_name=sdg_teacher_secret,
)
sdg_task.set_env_variable("HOME", "/tmp")
sdg_task.set_env_variable("HF_HOME", "/tmp")
Expand Down Expand Up @@ -380,6 +381,7 @@ def ilab_pipeline(
models_folder="/output/phase_2/model/hf_format",
max_workers=mt_bench_max_workers,
merge_system_user_message=mt_bench_merge_system_user_message,
judge_secret_name=eval_judge_secret,
)
mount_pvc(
task=run_mt_bench_task,
Expand Down Expand Up @@ -420,6 +422,7 @@ def ilab_pipeline(
merge_system_user_message=final_eval_merge_system_user_message,
few_shots=final_eval_few_shots,
batch_size=final_eval_batch_size,
judge_secret_name=eval_judge_secret,
)
mount_pvc(
task=final_eval_task, pvc_name=output_pvc_task.output, mount_path="/output"
Expand Down
Loading

0 comments on commit 10c1cf5

Please sign in to comment.