From 2dd95681b277707630731d8e468d1e72235b3576 Mon Sep 17 00:00:00 2001 From: Rui Campos Date: Wed, 3 Jan 2024 17:50:26 +0000 Subject: [PATCH] fixes --- .github/workflows/gpt_shakespear_experiment.yml | 8 +------- gpt_shakespear/train_worker.py | 3 ++- gpt_shakespear/user_data.sh | 16 ++++++---------- mtn_shakespeare/model/self_attention.py | 13 ++++++++++--- 4 files changed, 19 insertions(+), 21 deletions(-) diff --git a/.github/workflows/gpt_shakespear_experiment.yml b/.github/workflows/gpt_shakespear_experiment.yml index a0ae11ff..e8953cc1 100644 --- a/.github/workflows/gpt_shakespear_experiment.yml +++ b/.github/workflows/gpt_shakespear_experiment.yml @@ -24,16 +24,11 @@ on: required: true type: string default: '11' - tokens: - description: 'The number of tokens in the vocab.' - required: true - type: string - default: '3' bias: description: 'Whether to use bias in the linear layers.' required: true type: string - default: 'True' + default: 'False' batch_size: description: 'The batch size.' required: true @@ -90,7 +85,6 @@ jobs: export INSTANCE_TYPE=${{ github.event.inputs.instance_type }} export COORDINATES=${{ github.event.inputs.coordinates }} - export TOKENS=${{ github.event.inputs.tokens }} export WORDS=${{ github.event.inputs.words }} export NUMBER_OF_BLOCKS=${{ github.event.inputs.number_of_blocks }} export NUMBER_OF_HEADS=${{ github.event.inputs.number_of_heads }} diff --git a/gpt_shakespear/train_worker.py b/gpt_shakespear/train_worker.py index 4a83bcb4..96ddf849 100644 --- a/gpt_shakespear/train_worker.py +++ b/gpt_shakespear/train_worker.py @@ -15,6 +15,7 @@ from typing import Literal, Iterator import tiktoken import numpy as np +from typing import Optional @@ -164,7 +165,7 @@ def generate_epoch() -> Iterator[tuple[torch.Tensor, torch.Tensor]]: raise ValueError(f"Unknown loss function {train_params.loss_function}") # ----------------- LOAD MODEL ----------------- # - last_epoch: float | None = mlflow.get_run(run.info.run_id).data.metrics.get('epoch', None) + last_epoch: Optional[float] = mlflow.get_run(run.info.run_id).data.metrics.get('epoch', None) if last_epoch is not None: last_epoch = int(last_epoch) logger.debug("Last epoch is %s", last_epoch) diff --git a/gpt_shakespear/user_data.sh b/gpt_shakespear/user_data.sh index 949a5dde..5627bef6 100644 --- a/gpt_shakespear/user_data.sh +++ b/gpt_shakespear/user_data.sh @@ -3,8 +3,6 @@ export AWS_ACCESS_KEY_ID={AWS_ACCESS_KEY_ID} export AWS_SECRET_ACCESS_KEY={AWS_SECRET_ACCESS_KEY} -sudo yum install amazon-cloudwatch-agent -y - cat << 'EOF' > send_logs_to_cloudwatch.sh @@ -55,13 +53,13 @@ export MLFLOW_TRACKING_PASSWORD={MLFLOW_TRACKING_PASSWORD} sudo mkdir /larger_tmp export TMPDIR=/larger_tmp -sudo fallocate -l 30G /swapfile -sudo chmod 600 /swapfile -sudo mkswap /swapfile1 -sudo swapon /swapfile +# sudo fallocate -l 30G /swapfile +# sudo chmod 600 /swapfile +# sudo mkswap /swapfile +# sudo swapon /swapfile sudo yum update -y -sudo yum install -y git +sudo yum install -y git sudo yum install -y python sudo yum install -y python3-pip @@ -69,13 +67,11 @@ git clone https://github.com/Digital-Defiance/llm-voice-chat.git cd llm-voice-chat git checkout {current_commit} - - python -m venv env source env/bin/activate pip install -r .devcontainer/requirements.txt cd gpt_shakespear python train_worker.py #wait two minutes before shutting down, so that the logs can be sent to cloudwatch -shutdown -h +2 +shutdown -h +1 diff --git a/mtn_shakespeare/model/self_attention.py b/mtn_shakespeare/model/self_attention.py index a66cd117..53aeaeff 100644 --- a/mtn_shakespeare/model/self_attention.py +++ b/mtn_shakespeare/model/self_attention.py @@ -13,6 +13,11 @@ class SelfAttentionParameters(Protocol): words: int number_of_heads: int + + + + + class SelfAttention(nn.Module): attention_heads_dc: nn.Linear projection_cc: nn.Linear @@ -28,9 +33,11 @@ def __init__(self, params: SelfAttentionParameters): self.COORDINATES = params.coordinates self.NUMBER_OF_HEADS = params.number_of_heads - # d = 3*coordinates - dimension = 3 * params.coordinates - + + dimension = 2 * params.coordinates + + + self.attention_heads_dc = nn.Linear( params.coordinates, dimension,