Skip to content

Commit

Permalink
fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
RuiFilipeCampos committed Jan 3, 2024
1 parent b13a51e commit 2dd9568
Show file tree
Hide file tree
Showing 4 changed files with 19 additions and 21 deletions.
8 changes: 1 addition & 7 deletions .github/workflows/gpt_shakespear_experiment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,16 +24,11 @@ on:
required: true
type: string
default: '11'
tokens:
description: 'The number of tokens in the vocab.'
required: true
type: string
default: '3'
bias:
description: 'Whether to use bias in the linear layers.'
required: true
type: string
default: 'True'
default: 'False'
batch_size:
description: 'The batch size.'
required: true
Expand Down Expand Up @@ -90,7 +85,6 @@ jobs:
export INSTANCE_TYPE=${{ github.event.inputs.instance_type }}
export COORDINATES=${{ github.event.inputs.coordinates }}
export TOKENS=${{ github.event.inputs.tokens }}
export WORDS=${{ github.event.inputs.words }}
export NUMBER_OF_BLOCKS=${{ github.event.inputs.number_of_blocks }}
export NUMBER_OF_HEADS=${{ github.event.inputs.number_of_heads }}
Expand Down
3 changes: 2 additions & 1 deletion gpt_shakespear/train_worker.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
from typing import Literal, Iterator
import tiktoken
import numpy as np
from typing import Optional



Expand Down Expand Up @@ -164,7 +165,7 @@ def generate_epoch() -> Iterator[tuple[torch.Tensor, torch.Tensor]]:
raise ValueError(f"Unknown loss function {train_params.loss_function}")

# ----------------- LOAD MODEL ----------------- #
last_epoch: float | None = mlflow.get_run(run.info.run_id).data.metrics.get('epoch', None)
last_epoch: Optional[float] = mlflow.get_run(run.info.run_id).data.metrics.get('epoch', None)
if last_epoch is not None:
last_epoch = int(last_epoch)
logger.debug("Last epoch is %s", last_epoch)
Expand Down
16 changes: 6 additions & 10 deletions gpt_shakespear/user_data.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,6 @@

export AWS_ACCESS_KEY_ID={AWS_ACCESS_KEY_ID}
export AWS_SECRET_ACCESS_KEY={AWS_SECRET_ACCESS_KEY}
sudo yum install amazon-cloudwatch-agent -y



cat << 'EOF' > send_logs_to_cloudwatch.sh
Expand Down Expand Up @@ -55,27 +53,25 @@ export MLFLOW_TRACKING_PASSWORD={MLFLOW_TRACKING_PASSWORD}
sudo mkdir /larger_tmp
export TMPDIR=/larger_tmp

sudo fallocate -l 30G /swapfile
sudo chmod 600 /swapfile
sudo mkswap /swapfile1
sudo swapon /swapfile
# sudo fallocate -l 30G /swapfile
# sudo chmod 600 /swapfile
# sudo mkswap /swapfile
# sudo swapon /swapfile

sudo yum update -y
sudo yum install -y git
sudo yum install -y git
sudo yum install -y python
sudo yum install -y python3-pip

git clone https://github.com/Digital-Defiance/llm-voice-chat.git
cd llm-voice-chat
git checkout {current_commit}



python -m venv env
source env/bin/activate
pip install -r .devcontainer/requirements.txt
cd gpt_shakespear
python train_worker.py
#wait two minutes before shutting down, so that the logs can be sent to cloudwatch
shutdown -h +2
shutdown -h +1

13 changes: 10 additions & 3 deletions mtn_shakespeare/model/self_attention.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,11 @@ class SelfAttentionParameters(Protocol):
words: int
number_of_heads: int






class SelfAttention(nn.Module):
attention_heads_dc: nn.Linear
projection_cc: nn.Linear
Expand All @@ -28,9 +33,11 @@ def __init__(self, params: SelfAttentionParameters):

self.COORDINATES = params.coordinates
self.NUMBER_OF_HEADS = params.number_of_heads
# d = 3*coordinates
dimension = 3 * params.coordinates


dimension = 2 * params.coordinates



self.attention_heads_dc = nn.Linear(
params.coordinates,
dimension,
Expand Down

0 comments on commit 2dd9568

Please sign in to comment.