From c237042413d6b801057d78babd983560f9e641fe Mon Sep 17 00:00:00 2001 From: sageof6path Date: Fri, 13 Dec 2024 10:00:23 +0530 Subject: [PATCH 1/3] Updated util files to fix peft docker --- .../model_oss/util/commons.py | 57 +++++- .../model_oss/util/constants.py | 84 ++++++-- .../model_oss/util/fileutils.py | 179 ++++++++++++++---- .../model_oss/util/hypertune_utils.py | 8 + 4 files changed, 268 insertions(+), 60 deletions(-) diff --git a/community-content/vertex_model_garden/model_oss/util/commons.py b/community-content/vertex_model_garden/model_oss/util/commons.py index 69f106f64..b9a60ad08 100644 --- a/community-content/vertex_model_garden/model_oss/util/commons.py +++ b/community-content/vertex_model_garden/model_oss/util/commons.py @@ -1,13 +1,35 @@ """Common utility lib for prediction on images.""" -from typing import Any, Dict, List +from typing import Any, Dict, List, Tuple import numpy as np from PIL import Image import tensorflow as tf import yaml -from util import image_format_converter +from google3.cloud.ml.applications.vision.model_garden.model_oss.util import image_format_converter + + +def convert_list_to_label_map( + input_list: List[str], +) -> Tuple[Dict[str, Dict[int, str]], List[int]]: + """Converts a list of labels to a dictionary and numerical encoding. + + Args: + input_list: A list of strings representing class labels. + + Returns: + A tuple containing: + label_map: A dictionary mapping unique labels to integer indices. + encoded_list: A list of integers corresponding to the labels in the input + list. + """ + unique_labels = set(input_list) + label_map_reverse = {label: idx for idx, label in enumerate(unique_labels)} + label_map = {idx: label for idx, label in enumerate(unique_labels)} + encoded_list = [label_map_reverse[label] for label in input_list] + + return {"label_map": label_map}, encoded_list def get_prediction_instances(image: Image.Image) -> List[Dict[str, Any]]: @@ -40,14 +62,14 @@ def get_label_map(label_map_yaml_filepath: str) -> Dict[str, Any]: def get_object_detection_endpoint_predictions( - detection_endpoint: ..., + detector_endpoint: ..., input_image: np.ndarray, detection_thresh: float = 0.2, ) -> np.ndarray: """Gets endpoint predictions. Args: - detection_endpoint: image object detection endpoint. + detector_endpoint: image object detection endpoint. input_image: Input image. detection_thresh: Detection threshold. @@ -55,9 +77,10 @@ def get_object_detection_endpoint_predictions( Object detection predictions from endpoints. """ height, width, _ = input_image.shape - predictions = detection_endpoint.predict( + predictions = detector_endpoint.predict( get_prediction_instances(Image.fromarray(input_image)) ).predictions + detection_scores = np.array(predictions[0]["detection_scores"]) detection_classes = np.array(predictions[0]["detection_classes"]) detection_boxes = np.array( @@ -66,6 +89,29 @@ def get_object_detection_endpoint_predictions( for b in predictions[0]["detection_boxes"] ] ) + return merge_boxes_and_classes( + detection_scores, detection_boxes, detection_classes, detection_thresh + ) + + +def merge_boxes_and_classes( + detection_scores: np.ndarray, + detection_boxes: np.ndarray, + detection_classes: np.ndarray, + detection_thresh: float = 0.2, +) -> np.ndarray: + """Merges prediction boxes and classes. + + Args: + detection_scores: array of detection scores. + detection_boxes: array of detection boxes. + detection_classes: array of detection classes. + detection_thresh: float indicating the detection threshold. + + Returns: + preds_merge_cls: a numpy array containing the detection boxes, scores and + classes. + """ thresh_indices = [ x for x, val in enumerate(detection_scores) if val > detection_thresh ] @@ -76,4 +122,5 @@ def get_object_detection_endpoint_predictions( preds_merge_cls = np.column_stack( (preds_merge_conf, detection_classes[thresh_indices]) ) + return preds_merge_cls diff --git a/community-content/vertex_model_garden/model_oss/util/constants.py b/community-content/vertex_model_garden/model_oss/util/constants.py index 477e40622..4454c0016 100644 --- a/community-content/vertex_model_garden/model_oss/util/constants.py +++ b/community-content/vertex_model_garden/model_oss/util/constants.py @@ -36,6 +36,12 @@ # Reported hyperparameter tuning metric tag. HP_METRIC_TAG = 'model_performance' +HP_LOSS_TAG = 'model_loss' + +# Reported places. +REPORT_TO_NONE = 'none' +REPORT_TO_WANDB = 'wandb' +REPORT_TO_TENSORBOARD = 'tensorboard' # HPT trial prefix. TRIAL_PREFIX = 'trial_' @@ -45,7 +51,7 @@ ML_USE_VALIDATION = 'validation' ML_USE_TEST = 'test' -# COCO json keys +# COCO json keys. COCO_JSON_ANNOTATIONS = 'annotations' COCO_JSON_ANNOTATION_IMAGE_ID = 'image_id' COCO_JSON_ANNOTATION_CATEGORY_ID = 'category_id' @@ -60,36 +66,88 @@ COCO_JSON_IMAGE_COCO_URL = 'coco_url' COCO_ANNOTATION_BBOX = 'bbox' -# GCS prefixes +# GCS prefixes. GCS_URI_PREFIX = 'gs://' GCSFUSE_URI_PREFIX = '/gcs/' LOCAL_EVALUATION_RESULT_DIR = '/tmp/evaluation_result_dir' LOCAL_MODEL_DIR = '/tmp/model_dir' +LOCAL_LORA_DIR = '/tmp/lora_dir' LOCAL_BASE_MODEL_DIR = '/tmp/base_model_dir' LOCAL_DATA_DIR = '/tmp/data' +LOCAL_OUTPUT_DIR = '/tmp/output_dir' +LOCAL_PREDICTION_RESULT_DIR = '/tmp/prediction_result_dir' +SHARED_MEM_DIR = '/dev/shm' # Huggingface files. HF_MODEL_WEIGHTS_SUFFIX = '.bin' # PEFT finetuning constants. +TEXT_TO_IMAGE = 'text-to-image' TEXT_TO_IMAGE_LORA = 'text-to-image-lora' +TEXT_TO_IMAGE_DREAMBOOTH = 'text-to-image-dreambooth' +TEXT_TO_IMAGE_DREAMBOOTH_LORA = 'text-to-image-dreambooth-lora' +TEXT_TO_IMAGE_DREAMBOOTH_LORA_SDXL = 'text-to-image-dreambooth-lora-sdxl' SEQUENCE_CLASSIFICATION_LORA = 'sequence-classification-lora' -CAUSAL_LANGUAGE_MODELING_LORA = 'causal-language-modeling-lora' +MERGE_CAUSAL_LANGUAGE_MODEL_LORA = 'merge-causal-language-model-lora' +QUANTIZE_MODEL = 'quantize-model' INSTRUCT_LORA = 'instruct-lora' -CAUSAL_LANGUAGE_MODELING_LORA_TARGET_MODULES = [ - "q_proj", - "v_proj", -] -INSTRUCT_LORA_TARGET_MODULES = [ - "query_key_value", - "dense", - "dense_h_to_4h", - "dense_4h_to_h", -] +VALIDATE_DATASET_WITH_TEMPLATE = 'validate-dataset-with-template' +DEFAULT_TEXT_COLUMN_IN_DATASET = 'quote' +DEFAULT_TEXT_COLUMN_IN_QUANTIZATION_DATASET = 'text' +DEFAULT_INSTRUCT_COLUMN_IN_DATASET = 'text' + +FINAL_CHECKPOINT_DIRNAME = 'checkpoint-final' + +# ImageBind inference constants. +FEATURE_EMBEDDING_GENERATION = 'feature-embedding-generation' +ZERO_SHOT_CLASSIFICATION = 'zero-shot-classification' # Precision modes for loading model weights. +PRECISION_MODE_2 = '2bit' +PRECISION_MODE_3 = '3bit' PRECISION_MODE_4 = '4bit' PRECISION_MODE_8 = '8bit' +PRECISION_MODE_FP8 = 'float8' # to use fbgemm_fp8 quantization PRECISION_MODE_16 = 'float16' +PRECISION_MODE_16B = 'bfloat16' PRECISION_MODE_32 = 'float32' + +# Quantization modes. +GPTQ = 'gptq' +AWQ = 'awq' + +# AWQ versions. +GEMM = 'GEMM' +GEMV = 'GEMV' + +# Environment variable keys. +PRIVATE_BUCKET_ENV_KEY = 'AIP_PRIVATE_BUCKET_NAME' + +# Kfp pipeline constants. +TFVISION_TRAIN_OUTPUT_ARTIFACT_NAME = 'checkpoint_dir' + +# Vertex IOD type. +AUTOML = 'AUTOML' +MODEL_GARDEN = 'MODEL_GARDEN' + +# LRU Disk Cache constants. +MD5_HASHMAP_FILENAME = 'md5_hashmap.json' + +# Prediction request keys. +PREDICT_INSTANCE_KEY = 'instances' +PREDICT_INSTANCE_IMAGE_KEY = 'image' +PREDICT_INSTANCE_POSE_IMAGE_KEY = 'pose_image' +PREDICT_INSTANCE_TEXT_KEY = 'text' +PREDICT_INSTANCE_PROMPT_KEY = 'prompt' + +PREDICT_PARAMETERS_KEY = 'parameters' +PREDICT_PARAMETERS_NUM_INFERENCE_STEPS_KEY = 'num_inference_steps' +PREDICT_PARAMETERS_HEIGHT_KEY = 'height' +PREDICT_PARAMETERS_WIDTH_KEY = 'width' +PREDICT_PARAMETERS_GUIDANCE_SCALE_KEY = 'guidance_scale' +PREDICT_PARAMETERS_NEGATIVE_PROMPT_KEY = 'negative_prompt' +PREDICT_PARAMETERS_LORA_ID_KEY = 'lora_id' +PREDICT_PARAMETERS_IGNORE_LORA_CACHE_KEY = 'ignore_lora_cache' + +PREDICT_OUTPUT_KEY = 'output' diff --git a/community-content/vertex_model_garden/model_oss/util/fileutils.py b/community-content/vertex_model_garden/model_oss/util/fileutils.py index bc9e390f2..831d9d414 100644 --- a/community-content/vertex_model_garden/model_oss/util/fileutils.py +++ b/community-content/vertex_model_garden/model_oss/util/fileutils.py @@ -1,16 +1,27 @@ """Fileutil lib to copy files between gcs and local.""" -import glob +import fnmatch import os import pathlib import shutil -from typing import Tuple +from typing import List, Optional, Tuple import uuid from absl import logging from google.cloud import storage -from util import constants +from google3.cloud.ml.applications.vision.model_garden.model_oss.util import constants + + +_GCS_CLIENT = None + + +def _get_gcs_client() -> storage.Client: + """Gets the default GCS client.""" + global _GCS_CLIENT + if _GCS_CLIENT is None: + _GCS_CLIENT = storage.Client() + return _GCS_CLIENT def generate_tmp_path(extension: str = '') -> str: @@ -36,6 +47,16 @@ def force_gcs_fuse_path(gcs_uri: str) -> str: return gcs_uri +def force_gcs_path(uri: str) -> str: + """Converts /gcs/ uris to their gs:// equivalents. No-op for other uris.""" + if uri.startswith(constants.GCSFUSE_URI_PREFIX): + return uri.replace( + constants.GCSFUSE_URI_PREFIX, constants.GCS_URI_PREFIX, 1 + ) + else: + return uri + + def download_gcs_file_to_local_dir(gcs_uri: str, local_dir: str): """Download a gcs file to a local dir. @@ -62,15 +83,47 @@ def download_gcs_file_to_local(gcs_uri: str, local_path: str): raise ValueError( f'{gcs_uri} is not a GCS path starting with {constants.GCS_URI_PREFIX}.' ) - client = storage.Client() + client = _get_gcs_client() os.makedirs(os.path.dirname(local_path), exist_ok=True) with open(local_path, 'wb') as f: client.download_blob_to_file(gcs_uri, f) +def download_gcs_file_list_to_local( + gcs_uri_list: List[str], local_dir: str +) -> List[str]: + """Downloads a list of GCS files to a local directory. + + Args: + gcs_uri_list: A list of GCS file paths. + local_dir: Local directory in which the GCS files are saved. + + Returns: + The local file paths corresponding to the input GCS file paths. + + Raises: + ValueError: An input file path is not a GCS path. + """ + local_paths = [] + for gcs_uri in gcs_uri_list: + if not is_gcs_path(gcs_uri): + raise ValueError( + f'{gcs_uri} is not a GCS path starting with' + f' {constants.GCS_URI_PREFIX}.' + ) + local_path = os.path.join(local_dir, gcs_uri.replace('gs://', '')) + download_gcs_file_to_local(gcs_uri, local_path) + local_paths.append(local_path) + return local_paths + + def download_gcs_dir_to_local( - gcs_dir: str, local_dir: str, skip_hf_model_bin: bool = False -): + gcs_dir: str, + local_dir: str, + skip_hf_model_bin: bool = False, + allow_patterns: Optional[List[str]] = None, + log: bool = True, +) -> None: """Downloads files in a GCS directory to a local directory. For example: @@ -78,16 +131,21 @@ def download_gcs_dir_to_local( gs://bucket/foo/a -> /tmp/bar/a gs://bucket/foo/b/c -> /tmp/bar/b/c - Arguments: + Args: gcs_dir: A string of directory path on GCS. local_dir: A string of local directory path. skip_hf_model_bin: True to skip downloading HF model bin files. + allow_patterns: A list of allowed patterns. If provided, only files matching + one or more patterns are downloaded. + log: True to log each downloaded file. """ if not is_gcs_path(gcs_dir): raise ValueError(f'{gcs_dir} is not a GCS path starting with gs://.') bucket_name = gcs_dir.split('/')[2] - prefix = gcs_dir[len(constants.GCS_URI_PREFIX + bucket_name) :].strip('/') - client = storage.Client() + prefix = ( + gcs_dir[len(constants.GCS_URI_PREFIX + bucket_name) :].strip('/') + '/' + ) + client = _get_gcs_client() blobs = client.list_blobs(bucket_name, prefix=prefix) for blob in blobs: if blob.name[-1] == '/': @@ -95,43 +153,63 @@ def download_gcs_dir_to_local( file_path = blob.name[len(prefix) :].strip('/') local_file_path = os.path.join(local_dir, file_path) os.makedirs(os.path.dirname(local_file_path), exist_ok=True) + if allow_patterns and all( + [not fnmatch.fnmatch(file_path, p) for p in allow_patterns] + ): + continue if ( file_path.endswith(constants.HF_MODEL_WEIGHTS_SUFFIX) and skip_hf_model_bin ): - logging.info('Skip downloading model bin %s', file_path) + if log: + logging.info('Skip downloading model bin %s', file_path) with open(local_file_path, 'w') as f: - f.write(f'{constants.GCS_URI_PREFIX}{bucket_name}/{prefix}/{file_path}') + f.write(f'{constants.GCS_URI_PREFIX}{bucket_name}/{prefix}{file_path}') else: - logging.info('Downloading %s to %s', file_path, local_file_path) + if log: + logging.info('Downloading %s to %s', file_path, local_file_path) blob.download_to_filename(local_file_path) +def _get_relative_paths(base_dir: str) -> List[str]: + """Gets relative paths of all files in a local base directory.""" + path = pathlib.Path(base_dir) + relative_paths = [] + for local_file in path.rglob('*'): + if os.path.isfile(local_file): + relative_path = os.path.relpath(local_file, base_dir) + relative_paths.append(relative_path) + return relative_paths + + +def _upload_local_files_to_gcs( + relative_paths: List[str], local_dir: str, gcs_dir: str +): + """Uploads local files to gcs.""" + bucket_name = gcs_dir.split('/')[2] + blob_dir = '/'.join(gcs_dir.split('/')[3:]) + client = _get_gcs_client() + bucket = client.bucket(bucket_name) + for relative_path in relative_paths: + blob = bucket.blob(os.path.join(blob_dir, relative_path)) + blob.upload_from_filename(os.path.join(local_dir, relative_path)) + + def upload_local_dir_to_gcs(local_dir: str, gcs_dir: str): """Uploads local dir to gcs. For example: upload_local_dir_to_gcs(/tmp/bar, gs://bucket/foo) - gs://bucket/foo/a -> /tmp/bar/a - gs://bucket/foo/b/c -> /tmp/bar/b/c + /tmp/bar/a -> gs://bucket/foo/a + /tmp/bar/b/c -> gs://bucket/foo/b/c Arguments: local_dir: A string of local directory path. gcs_dir: A string of directory path on GCS. """ - bucket_name = gcs_dir.split('/')[2] - blob_dir = '/'.join(gcs_dir.split('/')[3:]) - client = storage.Client() - bucket = client.bucket(bucket_name) - for local_file in glob.glob(local_dir + '/**'): - if os.path.isfile(local_file): - logging.info( - 'Uploading %s to %s', - local_file, - os.path.join(constants.GCS_URI_PREFIX, bucket_name, blob_dir), - ) - blob = bucket.blob(os.path.join(blob_dir, os.path.basename(local_file))) - blob.upload_from_filename(local_file) + # Relative paths of all files in local_dir. + relative_paths = _get_relative_paths(local_dir) + _upload_local_files_to_gcs(relative_paths, local_dir, gcs_dir) def upload_file_to_gcs_path( @@ -155,7 +233,7 @@ def upload_file_to_gcs_path( if not source_path_obj.exists(): raise RuntimeError(f'Source path does not exist: {source_path}') - storage_client = storage.Client() + storage_client = _get_gcs_client() source_file_path = source_path destination_file_uri = destination_uri logging.info('Uploading "%s" to "%s"', source_file_path, destination_file_uri) @@ -174,7 +252,9 @@ def is_gcs_path(input_path: str) -> bool: Returns: True if the input path is a GCS path, False otherwise. """ - return input_path.startswith(constants.GCS_URI_PREFIX) + return input_path is not None and input_path.startswith( + constants.GCS_URI_PREFIX + ) def release_text_assets( @@ -232,13 +312,10 @@ def download_video_from_gcs_to_local(video_file_path: str) -> Tuple[str, str]: """ _, local_video_file_name = os.path.split(video_file_path) file_extension = os.path.splitext(video_file_path)[1] - if file_extension: - remote_video_file_name = local_video_file_name.replace( - file_extension, '_overlay.mp4' - ) - else: - remote_video_file_name = local_video_file_name + '_overlay.mp4' - local_file_path = generate_tmp_path(file_extension) + remote_video_file_name = local_video_file_name.replace( + file_extension, '_overlay.mp4' + ) + local_file_path = generate_tmp_path(os.path.splitext(video_file_path)[1]) logging.info('Downloading %s to %s...', video_file_path, local_file_path) download_gcs_file_to_local(video_file_path, local_file_path) return local_file_path, remote_video_file_name @@ -254,10 +331,28 @@ def get_output_video_file(video_output_file_path: str) -> str: str: Local video output file path. """ file_extension = os.path.splitext(video_output_file_path)[1] - if file_extension: - out_local_video_file_name = video_output_file_path.replace( - file_extension, '_overlay' + file_extension - ) - else: - out_local_video_file_name = video_output_file_path + '_overlay' + out_local_video_file_name = video_output_file_path.replace( + file_extension, '_overlay' + file_extension + ) return out_local_video_file_name + + +def write_first_party_model_metadata( + output_path: str, required_container_uri: str +) -> None: + """Write Vertex internal model metadata for first party artifacts.""" + model_metadata_fname = 'model_metadata.jsonl' + if len(required_container_uri) > 126: + raise ValueError(f'Docker URI exceeds 126 chars: {required_container_uri}') + payload = '\n{}{}'.format( # serialized proto + chr(len(required_container_uri)), + required_container_uri, + ) + os.makedirs(output_path, exist_ok=True) + output_dirs = [output_path] + if output_path.startswith('/gcs'): + # include all parent dirs, except "/", "/gcs" + output_dirs.extend([str(p) for p in pathlib.Path(output_path).parents][:-2]) + for output_dir in output_dirs: + with open(os.path.join(output_dir, model_metadata_fname), 'w') as f: + f.write(payload) diff --git a/community-content/vertex_model_garden/model_oss/util/hypertune_utils.py b/community-content/vertex_model_garden/model_oss/util/hypertune_utils.py index 886ce161d..2198c9c82 100644 --- a/community-content/vertex_model_garden/model_oss/util/hypertune_utils.py +++ b/community-content/vertex_model_garden/model_oss/util/hypertune_utils.py @@ -20,3 +20,11 @@ def get_trial_id_from_environment() -> str: _ENVIRONMENT_VARIABLE_FOR_TRIAL_ID, ) return os.environ.get(_ENVIRONMENT_VARIABLE_FOR_TRIAL_ID, '0') + + +def maybe_append_trial_id(path: str) -> str: + """Appends trial_N to path if running in a Hyperparameter Tuning Job.""" + trial_id = os.environ.get(_ENVIRONMENT_VARIABLE_FOR_TRIAL_ID) + if trial_id is None: + return path + return os.path.join(path, f'trial_{trial_id}') From ba58936b8073efd717a39b9c94ecf8826521642b Mon Sep 17 00:00:00 2001 From: sageof6path <31839119+sageof6path@users.noreply.github.com> Date: Fri, 13 Dec 2024 11:14:33 +0530 Subject: [PATCH 2/3] fix imports --- community-content/vertex_model_garden/model_oss/util/commons.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/community-content/vertex_model_garden/model_oss/util/commons.py b/community-content/vertex_model_garden/model_oss/util/commons.py index b9a60ad08..40d4a17cd 100644 --- a/community-content/vertex_model_garden/model_oss/util/commons.py +++ b/community-content/vertex_model_garden/model_oss/util/commons.py @@ -7,7 +7,7 @@ import tensorflow as tf import yaml -from google3.cloud.ml.applications.vision.model_garden.model_oss.util import image_format_converter +from util import image_format_converter def convert_list_to_label_map( From 9a94a0d90a51fc9c039a14910732e83ce6f638f2 Mon Sep 17 00:00:00 2001 From: sageof6path <31839119+sageof6path@users.noreply.github.com> Date: Fri, 13 Dec 2024 11:15:00 +0530 Subject: [PATCH 3/3] fix imports fileutils.py --- .../vertex_model_garden/model_oss/util/fileutils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/community-content/vertex_model_garden/model_oss/util/fileutils.py b/community-content/vertex_model_garden/model_oss/util/fileutils.py index 831d9d414..a3a5acd9b 100644 --- a/community-content/vertex_model_garden/model_oss/util/fileutils.py +++ b/community-content/vertex_model_garden/model_oss/util/fileutils.py @@ -10,7 +10,7 @@ from absl import logging from google.cloud import storage -from google3.cloud.ml.applications.vision.model_garden.model_oss.util import constants +from util import constants _GCS_CLIENT = None