diff --git a/pipeline.yaml b/pipeline.yaml index 3c9d3a7..73324f2 100644 --- a/pipeline.yaml +++ b/pipeline.yaml @@ -4,14 +4,12 @@ # Inputs: # base_model: str [Default: 'ibm-granite/granite-7b-base'] # batch_size: str [Default: 'auto'] -# device: str # effective_batch_size: int [Default: 3840.0] # few_shots: int [Default: 5.0] # learning_rate: float [Default: 0.0001] # max_batch_len: int [Default: 20000.0] # max_workers: str [Default: 'auto'] # merge_system_user_message: bool [Default: False] -# model_dtype: str [Default: 'bfloat16'] # nnodes: int [Default: 2.0] # nproc_per_node: int [Default: 3.0] # num_epochs_phase_1: int [Default: 2.0] @@ -492,16 +490,12 @@ components: candidate_model: isOptional: true parameterType: STRING - device: - parameterType: STRING few_shots: parameterType: NUMBER_INTEGER max_workers: parameterType: STRING merge_system_user_message: parameterType: BOOLEAN - model_dtype: - parameterType: STRING sdg_path: defaultValue: /input/sdg isOptional: true @@ -527,9 +521,6 @@ components: best_score_file: isOptional: true parameterType: STRING - device: - isOptional: true - parameterType: STRING max_workers: parameterType: STRING merge_system_user_message: @@ -637,7 +628,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.9.0'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.10.0'\ \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' &&\ \ python3 -m pip install --quiet --no-warn-script-location 'instructlab-training@git+https://github.com/instructlab/training.git'\ \ && \"$0\" \"$@\"\n" @@ -732,7 +723,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.9.0'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.10.0'\ \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' &&\ \ python3 -m pip install --quiet --no-warn-script-location 'huggingface_hub'\ \ && \"$0\" \"$@\"\n" @@ -805,7 +796,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.9.0'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.10.0'\ \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ $0\" \"$@\"\n" - sh @@ -822,7 +813,7 @@ deploymentSpec: \ *\n\ndef list_models_in_directory_op(models_folder: str) -> List[str]:\n\ \ import os\n\n models = os.listdir(models_folder)\n return models\n\ \n" - image: python:3.8 + image: python:3.9 exec-pvc-to-model-op: container: args: @@ -851,7 +842,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.9.0'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.10.0'\ \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ $0\" \"$@\"\n" - sh @@ -1008,7 +999,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.9.0'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.10.0'\ \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ $0\" \"$@\"\n" - sh @@ -1165,7 +1156,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.9.0'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.10.0'\ \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ $0\" \"$@\"\n" - sh @@ -1182,20 +1173,19 @@ deploymentSpec: \ *\n\ndef run_final_eval_op(\n mmlu_branch_output: Output[Artifact],\n\ \ mt_bench_branch_output: Output[Artifact],\n base_model_dir: str,\n\ \ base_branch: str,\n candidate_branch: str,\n max_workers: str,\n\ - \ device: str,\n model_dtype: str,\n few_shots: int,\n batch_size:\ - \ str,\n merge_system_user_message: bool,\n candidate_model: str =\ - \ None,\n taxonomy_path: str = \"/input/taxonomy\",\n sdg_path: str\ - \ = \"/input/sdg\",\n):\n import json\n import os\n import subprocess\n\ - \n import torch\n from instructlab.eval.mmlu import MMLU_TASKS, MMLUBranchEvaluator\n\ - \ from instructlab.eval.mt_bench import MTBenchBranchEvaluator\n from\ - \ instructlab.model.evaluate import qa_pairs_to_qna_to_avg_scores, sort_score\n\ - \n if judge_ca_cert := os.getenv(\"JUDGE_CA_CERT_PATH\"):\n import\ - \ httpx\n import openai\n\n # Create a custom HTTP client\n\ - \ class CustomHttpClient(httpx.Client):\n def __init__(self,\ - \ *args, **kwargs):\n # Use the custom CA certificate\n \ - \ kwargs.setdefault(\"verify\", judge_ca_cert)\n \ - \ super().__init__(*args, **kwargs)\n\n # Create a new OpenAI\ - \ class that uses the custom HTTP client\n class CustomOpenAI(openai.OpenAI):\n\ + \ few_shots: int,\n batch_size: str,\n merge_system_user_message:\ + \ bool,\n candidate_model: str = None,\n taxonomy_path: str = \"/input/taxonomy\"\ + ,\n sdg_path: str = \"/input/sdg\",\n):\n import json\n import\ + \ os\n import subprocess\n\n import torch\n from instructlab.eval.mmlu\ + \ import MMLUBranchEvaluator\n from instructlab.eval.mt_bench import\ + \ MTBenchBranchEvaluator\n from instructlab.model.evaluate import qa_pairs_to_qna_to_avg_scores,\ + \ sort_score\n\n if judge_ca_cert := os.getenv(\"JUDGE_CA_CERT_PATH\"\ + ):\n import httpx\n import openai\n\n # Create a custom\ + \ HTTP client\n class CustomHttpClient(httpx.Client):\n \ + \ def __init__(self, *args, **kwargs):\n # Use the custom\ + \ CA certificate\n kwargs.setdefault(\"verify\", judge_ca_cert)\n\ + \ super().__init__(*args, **kwargs)\n\n # Create a\ + \ new OpenAI class that uses the custom HTTP client\n class CustomOpenAI(openai.OpenAI):\n\ \ def __init__(self, *args, **kwargs):\n custom_client\ \ = CustomHttpClient()\n super().__init__(http_client=custom_client,\ \ *args, **kwargs)\n\n # Monkey patch the OpenAI class in the openai\ @@ -1214,11 +1204,11 @@ deploymentSpec: \ ]\n\n process = subprocess.Popen(args=command)\n\n \ \ print(f\"Waiting for vLLM server to start at {vllm_server}...\"\ )\n\n for attempt in range(retries):\n try:\n \ - \ response = requests.get(f\"{vllm_server}/models\")\n \ - \ if response.status_code == 200:\n print(f\"vLLM\ - \ server is up and running at {vllm_server}.\")\n return\ - \ process, vllm_server\n except requests.ConnectionError:\n \ - \ pass\n\n print(\n f\"Server not\ + \ response = requests.get(f\"{vllm_server}/models\", timeout=10)\n\ + \ if response.status_code == 200:\n print(f\"\ + vLLM server is up and running at {vllm_server}.\")\n \ + \ return process, vllm_server\n except requests.ConnectionError:\n\ + \ pass\n\n print(\n f\"Server not\ \ available yet, retrying in {delay} seconds (Attempt {attempt + 1}/{retries})...\"\ \n )\n time.sleep(delay)\n\n raise RuntimeError(\n\ \ f\"Failed to start vLLM server at {vllm_server} after {retries}\ @@ -1232,10 +1222,7 @@ deploymentSpec: )\n\n except subprocess.TimeoutExpired:\n print(\n \ \ f\"Timeout expired. Forcefully killing vLLM server with PID:\ \ {process.pid}\"\n )\n process.kill() # Force kill\ - \ the process if over timeout\n except subprocess.NoSuchProcess:\n\ - \ print(f\"Process with PID {process.pid} no longer exists.\"\ - )\n except Exception as e:\n print(f\"Failed to stop process\ - \ with PID {process.pid}. Error: {e}\")\n # Note from instructlab/model/backends/vllm.py\n\ + \ the process if over timeout\n\n # Note from instructlab/model/backends/vllm.py\n\ \ # vLLM relies on stable VRAM, residual reclamation activity\n\ \ # can lead to crashes on restart. To prevent this add a\n \ \ # short delay (typically ~ 10 seconds, max 30) to verify stability.\n\ @@ -1268,9 +1255,9 @@ deploymentSpec: \ task, avg_score = entry\n summary[\"no_changes\"\ ].append(\n {\"task\": task, \"average_score\": round(avg_score,\ \ 2)}\n )\n\n if new is not None and len(new) > 0:\n\ - \ for entry in new:\n na, avg_score = entry\n\ - \ summary[\"new\"].append(\n {\"qna\"\ - : qna, \"average_score\": round(avg_score, 2)}\n )\n\n \ + \ for entry in new:\n _, avg_score = entry\n \ + \ summary[\"new\"].append(\n {\"qna\":\ + \ qna, \"average_score\": round(avg_score, 2)}\n )\n\n \ \ return json.dumps(summary, indent=4)\n\n ######################################################################\n\ \ print(\"Checking GPUs...\")\n gpu_available = torch.cuda.is_available()\n\ \ gpu_name = (\n torch.cuda.get_device_name(torch.cuda.current_device())\n\ @@ -1281,12 +1268,12 @@ deploymentSpec: \n # This is very specific to 'ilab generate', necessary because the\ \ data generation and\n # model evaluation are taking place in separate\ \ environments.\n def update_test_lines_in_files(base_dir):\n \ - \ import os\n\n import yaml\n\n for root, dirs, files in os.walk(base_dir):\n\ + \ import os\n\n import yaml\n\n for root, _, files in os.walk(base_dir):\n\ \ for file_name in files:\n if file_name.startswith(\"\ knowledge_\") and file_name.endswith(\n \"_task.yaml\"\ \n ):\n file_path = os.path.join(root,\ - \ file_name)\n\n with open(file_path, \"r\") as file:\n\ - \ task_yaml = yaml.load(file, Loader=yaml.Loader)\n\ + \ file_name)\n\n with open(file_path, \"r\", encoding=\"\ + utf-8\") as file:\n task_yaml = yaml.load(file, Loader=yaml.Loader)\n\ \n current_test_file_path = task_yaml[\"dataset_kwargs\"\ ][\"data_files\"][\n \"test\"\n \ \ ]\n current_test_file_path_parts = current_test_file_path.split(\"\ @@ -1298,7 +1285,7 @@ deploymentSpec: \ to find sdg output node_datasets_*\n def find_node_dataset_directories(base_dir:\ \ str):\n import os\n import re\n\n # This is specific\ \ to ilab/eval output\n pattern = r\"node_datasets_\"\n matching_dirs\ - \ = []\n regex = re.compile(pattern)\n\n for root, dirs, files\ + \ = []\n regex = re.compile(pattern)\n\n for root, dirs, _\ \ in os.walk(base_dir):\n for directory in dirs:\n \ \ if regex.search(directory):\n matching_dirs.append(os.path.join(root,\ \ directory))\n\n # From 'ilab sdg' the knowledge_*_task.yaml files\ @@ -1344,16 +1331,16 @@ deploymentSpec: : candidate_model,\n \"model_score\": round(overall_score, 2),\n\ \ \"base_model\": base_model_dir,\n \"base_model_score\"\ : round(base_overall_score, 2),\n \"summary\": summary,\n \ - \ }\n\n with open(mmlu_branch_output.path, \"w\") as f:\n \ - \ json.dump(mmlu_branch_data, f, indent=4)\n else:\n print(\"\ - No MMLU tasks directories found, skipping MMLU_branch evaluation.\")\n\n\ - \ # MT_BENCH_BRANCH\n\n print(\"Starting MT_BENCH_BRANCH ...\")\n\n\ - \ judge_api_key = os.getenv(\"JUDGE_API_KEY\", \"\")\n judge_model_name\ - \ = os.getenv(\"JUDGE_NAME\")\n judge_endpoint = os.getenv(\"JUDGE_ENDPOINT\"\ - )\n\n output_dir = \"/tmp/eval_output\"\n\n # TODO: candidate_branch\ - \ must be in same repo, not a fork, or, can compare main branch against\ - \ candidate, base models\n base_branch = base_branch or \"main\"\n \ - \ candidate_branch = candidate_branch or \"main\"\n\n ######################################################################\n\ + \ }\n\n with open(mmlu_branch_output.path, \"w\", encoding=\"\ + utf-8\") as f:\n json.dump(mmlu_branch_data, f, indent=4)\n \ + \ else:\n print(\"No MMLU tasks directories found, skipping MMLU_branch\ + \ evaluation.\")\n\n # MT_BENCH_BRANCH\n\n print(\"Starting MT_BENCH_BRANCH\ + \ ...\")\n\n judge_api_key = os.getenv(\"JUDGE_API_KEY\", \"\")\n \ + \ judge_model_name = os.getenv(\"JUDGE_NAME\")\n judge_endpoint = os.getenv(\"\ + JUDGE_ENDPOINT\")\n\n output_dir = \"/tmp/eval_output\"\n\n # TODO:\ + \ candidate_branch must be in same repo, not a fork, or, can compare main\ + \ branch against candidate, base models\n base_branch = base_branch or\ + \ \"main\"\n candidate_branch = candidate_branch or \"main\"\n\n ######################################################################\n\ \ # TODO: Update ilab/model/evaluate evaluate def logic to allow for\ \ external judge model\n # and when that happens, much of this logic\ \ can be imported from the 'evaluate' definition:\n # https://github.com/instructlab/instructlab/blob/83ca501ecdd858677380046e2a56da5b2f3f14e7/src/instructlab/model/evaluate.py#L504\n\ @@ -1371,8 +1358,8 @@ deploymentSpec: \ is calculated based on environment\n # https://github.com/instructlab/eval/blob/main/src/instructlab/eval/mt_bench.py#L36\n\ \ if max_workers == \"auto\":\n try:\n usable_cpu_count\ \ = len(os.sched_getaffinity(0)) // 2\n except AttributeError:\n\ - \ usable_cpu_count = multiprocessing.cpu_count() // 2\n \ - \ max_workers = usable_cpu_count\n\n branches = [candidate_branch,\ + \ import multiprocessing\n\n usable_cpu_count = multiprocessing.cpu_count()\ + \ // 2\n max_workers = usable_cpu_count\n\n branches = [candidate_branch,\ \ base_branch]\n m_paths = [candidate_model, base_model_dir]\n qa_pairs_and_errors\ \ = []\n for i, evaluator in enumerate(mt_bench_evaluators):\n \ \ branch = branches[i]\n m_path = m_paths[i]\n\n print(\n\ @@ -1411,8 +1398,8 @@ deploymentSpec: \ \"judge_model\": judge_model_name,\n \"max_score\": \"10.0\"\ ,\n \"overall_score\": overall_score,\n \"base_overall_score\"\ : base_overall_score,\n \"error_rate\": error_rate,\n \"summary\"\ - : summary,\n }\n\n with open(mt_bench_branch_output.path, \"w\") as\ - \ f:\n json.dump(mt_bench_branch_data, f, indent=4)\n\n" + : summary,\n }\n\n with open(mt_bench_branch_output.path, \"w\", encoding=\"\ + utf-8\") as f:\n json.dump(mt_bench_branch_data, f, indent=4)\n\n" env: - name: HOME value: /tmp @@ -1421,8 +1408,8 @@ deploymentSpec: image: quay.io/redhat-et/ilab:1.2 resources: accelerator: - count: '1' - type: nvidia.com/gpu + resourceCount: '1' + resourceType: nvidia.com/gpu exec-run-mt-bench-op: container: args: @@ -1435,7 +1422,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.9.0'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.10.0'\ \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ $0\" \"$@\"\n" - sh @@ -1455,41 +1442,40 @@ deploymentSpec: \ is calculated based on environment\n # https://github.com/instructlab/eval/blob/main/src/instructlab/eval/mt_bench.py#L36\n\ \ max_workers: str,\n output_path: str = \"/output/mt_bench_data.json\"\ ,\n models_list: List[str] = None,\n models_folder: Optional[str]\ - \ = None,\n device: str = None,\n best_score_file: Optional[str] =\ - \ None,\n) -> NamedTuple(\"outputs\", best_model=str, best_score=float):\n\ - \ import json\n import os\n import subprocess\n\n import torch\n\ - \ from instructlab.eval.mt_bench import MTBenchEvaluator\n\n if judge_ca_cert\ - \ := os.getenv(\"JUDGE_CA_CERT_PATH\"):\n import httpx\n import\ - \ openai\n\n # Create a custom HTTP client\n class CustomHttpClient(httpx.Client):\n\ - \ def __init__(self, *args, **kwargs):\n # Use\ - \ the custom CA certificate\n kwargs.setdefault(\"verify\"\ - , judge_ca_cert)\n super().__init__(*args, **kwargs)\n\n\ - \ # Create a new OpenAI class that uses the custom HTTP client\n\ - \ class CustomOpenAI(openai.OpenAI):\n def __init__(self,\ - \ *args, **kwargs):\n custom_client = CustomHttpClient()\n\ - \ super().__init__(http_client=custom_client, *args, **kwargs)\n\ - \n # Monkey patch the OpenAI class in the openai module, so that\ - \ the eval lib can use it\n openai.OpenAI = CustomOpenAI\n\n def\ - \ launch_vllm(\n model_path: str, gpu_count: int, retries: int =\ - \ 120, delay: int = 10\n ) -> tuple:\n import subprocess\n \ - \ import sys\n import time\n\n import requests\n \ - \ from instructlab.model.backends.common import free_tcp_ipv4_port\n\n\ - \ free_port = free_tcp_ipv4_port(\"127.0.0.1\")\n port = str(free_port)\n\ - \ vllm_server = f\"http://127.0.0.1:{port}/v1\"\n\n command\ - \ = [\n sys.executable,\n \"-m\",\n \"\ - vllm.entrypoints.openai.api_server\",\n \"--port\",\n \ - \ port,\n \"--model\",\n model_path,\n \ - \ ]\n if gpu_count > 0:\n command += [\n \ - \ \"--tensor-parallel-size\",\n str(gpu_count),\n \ - \ ]\n\n process = subprocess.Popen(args=command)\n\n \ - \ print(f\"Waiting for vLLM server to start at {vllm_server}...\")\n\n\ - \ for attempt in range(retries):\n try:\n \ - \ response = requests.get(f\"{vllm_server}/models\")\n \ - \ if response.status_code == 200:\n print(f\"vLLM server\ - \ is up and running at {vllm_server}.\")\n return process,\ - \ vllm_server\n except requests.ConnectionError:\n \ - \ pass\n\n print(\n f\"Server not available\ - \ yet, retrying in {delay} seconds (Attempt {attempt + 1}/{retries})...\"\ + \ = None,\n best_score_file: Optional[str] = None,\n) -> NamedTuple(\"\ + outputs\", best_model=str, best_score=float):\n import json\n import\ + \ os\n import subprocess\n\n import torch\n from instructlab.eval.mt_bench\ + \ import MTBenchEvaluator\n\n if judge_ca_cert := os.getenv(\"JUDGE_CA_CERT_PATH\"\ + ):\n import httpx\n import openai\n\n # Create a custom\ + \ HTTP client\n class CustomHttpClient(httpx.Client):\n \ + \ def __init__(self, *args, **kwargs):\n # Use the custom\ + \ CA certificate\n kwargs.setdefault(\"verify\", judge_ca_cert)\n\ + \ super().__init__(*args, **kwargs)\n\n # Create a\ + \ new OpenAI class that uses the custom HTTP client\n class CustomOpenAI(openai.OpenAI):\n\ + \ def __init__(self, *args, **kwargs):\n custom_client\ + \ = CustomHttpClient()\n super().__init__(http_client=custom_client,\ + \ *args, **kwargs)\n\n # Monkey patch the OpenAI class in the openai\ + \ module, so that the eval lib can use it\n openai.OpenAI = CustomOpenAI\n\ + \n def launch_vllm(\n model_path: str, gpu_count: int, retries:\ + \ int = 120, delay: int = 10\n ) -> tuple:\n import subprocess\n\ + \ import sys\n import time\n\n import requests\n \ + \ from instructlab.model.backends.common import free_tcp_ipv4_port\n\ + \n free_port = free_tcp_ipv4_port(\"127.0.0.1\")\n port =\ + \ str(free_port)\n vllm_server = f\"http://127.0.0.1:{port}/v1\"\n\ + \n command = [\n sys.executable,\n \"-m\",\n\ + \ \"vllm.entrypoints.openai.api_server\",\n \"--port\"\ + ,\n port,\n \"--model\",\n model_path,\n\ + \ ]\n if gpu_count > 0:\n command += [\n \ + \ \"--tensor-parallel-size\",\n str(gpu_count),\n\ + \ ]\n\n process = subprocess.Popen(args=command)\n\n \ + \ print(f\"Waiting for vLLM server to start at {vllm_server}...\"\ + )\n\n for attempt in range(retries):\n try:\n \ + \ response = requests.get(f\"{vllm_server}/models\")\n \ + \ if response.status_code == 200:\n print(f\"vLLM\ + \ server is up and running at {vllm_server}.\")\n return\ + \ process, vllm_server\n except requests.ConnectionError:\n \ + \ pass\n\n print(\n f\"Server not\ + \ available yet, retrying in {delay} seconds (Attempt {attempt + 1}/{retries})...\"\ \n )\n time.sleep(delay)\n\n raise RuntimeError(\n\ \ f\"Failed to start vLLM server at {vllm_server} after {retries}\ \ retries.\"\n )\n\n def shutdown_vllm(process: subprocess.Popen,\ @@ -1502,20 +1488,19 @@ deploymentSpec: )\n\n except subprocess.TimeoutExpired:\n print(\n \ \ f\"Timeout expired. Forcefully killing vLLM server with PID:\ \ {process.pid}\"\n )\n process.kill() # Force kill\ - \ the process if over timeout\n except subprocess.NoSuchProcess:\n\ - \ print(f\"Process with PID {process.pid} no longer exists.\"\ - )\n except Exception as e:\n print(f\"Failed to stop process\ - \ with PID {process.pid}. Error: {e}\")\n # Note from instructlab/model/backends/vllm.py\n\ - \ # vLLM relies on stable VRAM, residual reclamation activity\n\ - \ # can lead to crashes on restart. To prevent this add a\n \ - \ # short delay (typically ~ 10 seconds, max 30) to verify stability.\n\ - \ wait_for_stable_vram(30)\n\n gpu_available = torch.cuda.is_available()\n\ - \ gpu_name = (\n torch.cuda.get_device_name(torch.cuda.current_device())\n\ - \ if gpu_available\n else \"No GPU available\"\n )\n \ - \ gpu_count = torch.cuda.device_count() if gpu_available else 0\n\n \ - \ print(f\"GPU Available: {gpu_available}, {gpu_name}\")\n\n if models_list\ - \ is None and models_folder:\n models_list = os.listdir(models_folder)\n\ - \n judge_api_key = os.getenv(\"JUDGE_API_KEY\", \"\")\n judge_model_name\ + \ the process if over timeout\n except Exception as e:\n \ + \ print(f\"Failed to stop process with PID {process.pid}. Error: {e}\"\ + )\n # Note from instructlab/model/backends/vllm.py\n # vLLM\ + \ relies on stable VRAM, residual reclamation activity\n # can lead\ + \ to crashes on restart. To prevent this add a\n # short delay (typically\ + \ ~ 10 seconds, max 30) to verify stability.\n wait_for_stable_vram(30)\n\ + \n gpu_available = torch.cuda.is_available()\n gpu_name = (\n \ + \ torch.cuda.get_device_name(torch.cuda.current_device())\n if\ + \ gpu_available\n else \"No GPU available\"\n )\n gpu_count\ + \ = torch.cuda.device_count() if gpu_available else 0\n\n print(f\"GPU\ + \ Available: {gpu_available}, {gpu_name}\")\n\n if models_list is None\ + \ and models_folder:\n models_list = os.listdir(models_folder)\n\n\ + \ judge_api_key = os.getenv(\"JUDGE_API_KEY\", \"\")\n judge_model_name\ \ = os.getenv(\"JUDGE_NAME\")\n judge_endpoint = os.getenv(\"JUDGE_ENDPOINT\"\ )\n\n scores = {}\n all_mt_bench_data = []\n\n # generate_answers,judgment\ \ uses a magic word for its mt_bench evaluator - 'auto'\n # with 'auto',\ @@ -1523,15 +1508,15 @@ deploymentSpec: \ # https://github.com/instructlab/eval/blob/main/src/instructlab/eval/mt_bench.py#L36\n\ \ if max_workers == \"auto\":\n try:\n usable_cpu_count\ \ = len(os.sched_getaffinity(0)) // 2\n except AttributeError:\n\ - \ usable_cpu_count = multiprocessing.cpu_count() // 2\n \ - \ max_workers = usable_cpu_count\n\n # modify model_list to ignore\ - \ any jsonl files present in the directory\n models_list = [model for\ - \ model in models_list if not model.endswith(\".jsonl\")]\n for model_name\ - \ in models_list:\n print(f\"Serving candidate model: {model_name}\"\ - )\n model_path = f\"{models_path_prefix}/{model_name}\"\n\n \ - \ vllm_process, vllm_server = launch_vllm(model_path, gpu_count)\n\n \ - \ # model ID is the model_path value in vLLM\n evaluator =\ - \ MTBenchEvaluator(\n model_name=model_path,\n judge_model_name=judge_model_name,\n\ + \ import multiprocessing\n\n usable_cpu_count = multiprocessing.cpu_count()\ + \ // 2\n max_workers = usable_cpu_count\n\n # modify model_list\ + \ to ignore any jsonl files present in the directory\n models_list =\ + \ [model for model in models_list if not model.endswith(\".jsonl\")]\n \ + \ for model_name in models_list:\n print(f\"Serving candidate model:\ + \ {model_name}\")\n model_path = f\"{models_path_prefix}/{model_name}\"\ + \n\n vllm_process, vllm_server = launch_vllm(model_path, gpu_count)\n\ + \n # model ID is the model_path value in vLLM\n evaluator\ + \ = MTBenchEvaluator(\n model_name=model_path,\n judge_model_name=judge_model_name,\n\ \ output_dir=\"/tmp/eval_output\",\n merge_system_user_message=merge_system_user_message,\n\ \ )\n\n evaluator.gen_answers(\n server_url=vllm_server,\n\ \ serving_gpus=gpu_count,\n max_workers=max_workers,\n\ @@ -1567,8 +1552,8 @@ deploymentSpec: image: quay.io/redhat-et/ilab:1.2 resources: accelerator: - count: '1' - type: nvidia.com/gpu + resourceCount: '1' + resourceType: nvidia.com/gpu exec-sdg-op: container: args: @@ -1581,7 +1566,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.9.0'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.10.0'\ \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ $0\" \"$@\"\n" - sh @@ -2057,16 +2042,12 @@ root: candidate_model: runtimeValue: constant: /output/phase_2/model/hf_format/candidate_model - device: - componentInputParameter: device few_shots: componentInputParameter: few_shots max_workers: componentInputParameter: max_workers merge_system_user_message: componentInputParameter: merge_system_user_message - model_dtype: - componentInputParameter: model_dtype taskInfo: name: run-final-eval-op run-mt-bench-op: @@ -2078,8 +2059,6 @@ root: - list-models-in-directory-op inputs: parameters: - device: - componentInputParameter: device max_workers: componentInputParameter: max_workers merge_system_user_message: @@ -2153,9 +2132,6 @@ root: defaultValue: auto isOptional: true parameterType: STRING - device: - isOptional: true - parameterType: STRING effective_batch_size: defaultValue: 3840.0 isOptional: true @@ -2180,10 +2156,6 @@ root: defaultValue: false isOptional: true parameterType: BOOLEAN - model_dtype: - defaultValue: bfloat16 - isOptional: true - parameterType: STRING nnodes: defaultValue: 2.0 isOptional: true @@ -2235,7 +2207,7 @@ root: isOptional: true parameterType: STRING schemaVersion: 2.1.0 -sdkVersion: kfp-2.9.0 +sdkVersion: kfp-2.10.0 --- platforms: kubernetes: diff --git a/standalone/standalone.py b/standalone/standalone.py index e366929..1e7da96 100755 --- a/standalone/standalone.py +++ b/standalone/standalone.py @@ -1515,7 +1515,6 @@ def run_mt_bench_op( output_path: str = "/output/mt_bench_data.json", models_list: List[str] = None, models_folder: Optional[str] = None, - device: str = None, best_score_file: Optional[str] = None, ) -> NamedTuple("outputs", best_model=str, best_score=float): import json @@ -1616,8 +1615,6 @@ def shutdown_vllm(process: subprocess.Popen, timeout: int = 20): f"Timeout expired. Forcefully killing vLLM server with PID: {process.pid}" ) process.kill() # Force kill the process if over timeout - except subprocess.NoSuchProcess: - print(f"Process with PID {process.pid} no longer exists.") except Exception as e: print(f"Failed to stop process with PID {process.pid}. Error: {e}") # Note from instructlab/model/backends/vllm.py @@ -1653,6 +1650,8 @@ def shutdown_vllm(process: subprocess.Popen, timeout: int = 20): try: usable_cpu_count = len(os.sched_getaffinity(0)) // 2 except AttributeError: + import multiprocessing + usable_cpu_count = multiprocessing.cpu_count() // 2 max_workers = usable_cpu_count @@ -1735,8 +1734,6 @@ def run_final_eval_op( base_branch: str, candidate_branch: str, max_workers: str, - device: str, - model_dtype: str, few_shots: int, batch_size: str, merge_system_user_message: bool, @@ -1749,7 +1746,7 @@ def run_final_eval_op( import subprocess import torch - from instructlab.eval.mmlu import MMLU_TASKS, MMLUBranchEvaluator + from instructlab.eval.mmlu import MMLUBranchEvaluator from instructlab.eval.mt_bench import MTBenchBranchEvaluator from instructlab.model.evaluate import qa_pairs_to_qna_to_avg_scores, sort_score @@ -1810,7 +1807,7 @@ def launch_vllm( for attempt in range(retries): try: - response = requests.get(f"{vllm_server}/models") + response = requests.get(f"{vllm_server}/models", timeout=10) if response.status_code == 200: print(f"vLLM server is up and running at {vllm_server}.") return process, vllm_server @@ -1846,10 +1843,7 @@ def shutdown_vllm(process: subprocess.Popen, timeout: int = 20): f"Timeout expired. Forcefully killing vLLM server with PID: {process.pid}" ) process.kill() # Force kill the process if over timeout - except subprocess.NoSuchProcess: - print(f"Process with PID {process.pid} no longer exists.") - except Exception as e: - print(f"Failed to stop process with PID {process.pid}. Error: {e}") + # Note from instructlab/model/backends/vllm.py # vLLM relies on stable VRAM, residual reclamation activity # can lead to crashes on restart. To prevent this add a @@ -1911,7 +1905,7 @@ def branch_eval_summary_to_json( if new is not None and len(new) > 0: for entry in new: - na, avg_score = entry + _, avg_score = entry summary["new"].append( {"qna": qna, "average_score": round(avg_score, 2)} ) @@ -1942,14 +1936,14 @@ def update_test_lines_in_files(base_dir): import yaml - for root, dirs, files in os.walk(base_dir): + for root, _, files in os.walk(base_dir): for file_name in files: if file_name.startswith("knowledge_") and file_name.endswith( "_task.yaml" ): file_path = os.path.join(root, file_name) - with open(file_path, "r") as file: + with open(file_path, "r", encoding="utf-8") as file: task_yaml = yaml.load(file, Loader=yaml.Loader) current_test_file_path = task_yaml["dataset_kwargs"]["data_files"][ @@ -1973,7 +1967,7 @@ def find_node_dataset_directories(base_dir: str): matching_dirs = [] regex = re.compile(pattern) - for root, dirs, files in os.walk(base_dir): + for root, dirs, _ in os.walk(base_dir): for directory in dirs: if regex.search(directory): matching_dirs.append(os.path.join(root, directory)) @@ -2062,7 +2056,7 @@ def find_node_dataset_directories(base_dir: str): "summary": summary, } - with open(mmlu_branch_output, "w") as f: + with open(mmlu_branch_output, "w", encoding="utf-8") as f: json.dump(mmlu_branch_data, f, indent=4) else: print("No MMLU tasks directories found, skipping MMLU_branch evaluation.") @@ -2113,6 +2107,8 @@ def find_node_dataset_directories(base_dir: str): try: usable_cpu_count = len(os.sched_getaffinity(0)) // 2 except AttributeError: + import multiprocessing + usable_cpu_count = multiprocessing.cpu_count() // 2 max_workers = usable_cpu_count @@ -2202,11 +2198,11 @@ def find_node_dataset_directories(base_dir: str): "summary": summary, } - with open(mt_bench_branch_output, "w") as f: + with open(mt_bench_branch_output, "w", encoding="utf-8") as f: json.dump(mt_bench_branch_data, f, indent=4) """ exec_run_final_eval_op_args = f""" -run_final_eval_op(mmlu_branch_output="{MMLU_BRANCH_SCORES_PATH}", mt_bench_branch_output="{MT_BENCH_BRANCH_SCORES_PATH}", candidate_model="{CANDIDATE_MODEL_PATH}", taxonomy_path="{TAXONOMY_PATH}", sdg_path="{DATA_PVC_SDG_PATH}", base_branch="", candidate_branch="", device=None, base_model_dir="{DATA_PVC_MODEL_PATH}", max_workers="{MAX_WORKERS}", merge_system_user_message={MERGE_SYSTEM_USER_MESSAGE}, model_dtype="{MODEL_DTYPE}", few_shots={FEW_SHOTS}, batch_size="{BATCH_SIZE}") +run_final_eval_op(mmlu_branch_output="{MMLU_BRANCH_SCORES_PATH}", mt_bench_branch_output="{MT_BENCH_BRANCH_SCORES_PATH}", candidate_model="{CANDIDATE_MODEL_PATH}", taxonomy_path="{TAXONOMY_PATH}", sdg_path="{DATA_PVC_SDG_PATH}", base_branch="", candidate_branch="", base_model_dir="{DATA_PVC_MODEL_PATH}", max_workers="{MAX_WORKERS}", merge_system_user_message={MERGE_SYSTEM_USER_MESSAGE}, model_dtype="{MODEL_DTYPE}", few_shots={FEW_SHOTS}, batch_size="{BATCH_SIZE}") """ eval_container = kubernetes.client.V1Container(