From d84e321aa880e4411091301c49b131010bd46afe Mon Sep 17 00:00:00 2001 From: AlArgente Date: Mon, 22 Jan 2024 07:54:30 +0100 Subject: [PATCH] Change the GRU to a BiGRU in the SA Task. Also now we use the IMDb dataset, as we were using the AG_NEWs dataset by mistake. --- .github/workflows/pytest.yml | 35 ++++++++++++++ .github/workflows/trunk.yml | 27 +++++++++++ .github/workflows/update-coverage.yml | 47 +++++++++++++++++++ ...ed IMDb PT using FLExible with a GRU.ipynb | 25 ++++++---- 4 files changed, 124 insertions(+), 10 deletions(-) create mode 100644 .github/workflows/pytest.yml create mode 100644 .github/workflows/trunk.yml create mode 100644 .github/workflows/update-coverage.yml diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml new file mode 100644 index 0000000..2fdec6a --- /dev/null +++ b/.github/workflows/pytest.yml @@ -0,0 +1,35 @@ +# This workflow will install dependencies, create coverage tests and run Pytest Coverage Comment +# For more information see: https://github.com/MishaKav/pytest-coverage-comment/ +name: Tests +on: + pull_request: + branches: + - main + types: [synchronize, opened] +jobs: + build: + runs-on: ubuntu-20.04 + steps: + - uses: actions/checkout@v2 + + - name: Set up Python 3.8.10 + uses: actions/setup-python@v4 + with: + python-version: 3.8.10 + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install flake8 pytest pytest-cov + pip install ".[develop]" + + - name: Build coverage file + run: | + pytest -n 2 --junitxml=pytest.xml --cov-report=term-missing:skip-covered --cov=flex tests/ | tee pytest-coverage.txt + + - name: Pytest coverage comment + uses: MishaKav/pytest-coverage-comment@main + with: + report-only-changed-files: false + pytest-coverage-path: ./pytest-coverage.txt + junitxml-path: ./pytest.xml diff --git a/.github/workflows/trunk.yml b/.github/workflows/trunk.yml new file mode 100644 index 0000000..0a7de92 --- /dev/null +++ b/.github/workflows/trunk.yml @@ -0,0 +1,27 @@ +# This is a basic workflow to help you get started with Actions + +name: Linter + +on: + pull_request: + branches: + - 'main' + types: [synchronize, opened] + + # Allows you to run this workflow manually from the Actions tab + workflow_dispatch: + +# A workflow run is made up of one or more jobs that can run sequentially or in parallel +jobs: + # This workflow contains a single job called "build" + build: + # The type of runner that the job will run on + runs-on: ubuntu-latest + + # Steps represent a sequence of tasks that will be executed as part of the job + steps: + # Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it + - uses: actions/checkout@v3 + + - name: Trunk Check + uses: trunk-io/trunk-action@v1.0.3 diff --git a/.github/workflows/update-coverage.yml b/.github/workflows/update-coverage.yml new file mode 100644 index 0000000..e6e23be --- /dev/null +++ b/.github/workflows/update-coverage.yml @@ -0,0 +1,47 @@ +# This workflow will install dependencies, create coverage tests and run Pytest Coverage Comment +# For more information see: https://github.com/MishaKav/pytest-coverage-comment/ +name: Update README +on: + push: + branches: [ 'main' ] + +jobs: + build: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + + - name: Set up Python 3.9 + uses: actions/setup-python@v2 + with: + python-version: 3.9 + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install flake8 pytest pytest-cov + pip install . + + - name: Build coverage file + run: | + pytest --cov-report=term-missing:skip-covered --cov=flex tests/ | tee pytest-coverage.txt + + - name: Pytest coverage comment + if: ${{ github.ref == 'refs/heads/main' }} + id: coverageComment + uses: MishaKav/pytest-coverage-comment@main + with: + hide-comment: true + pytest-coverage-path: ./pytest-coverage.txt + + - name: Update Readme with Coverage Html + if: ${{ github.ref == 'refs/heads/main' }} + run: | + sed -i '//,//c\\n\${{ steps.coverageComment.outputs.coverageHtml }}\n' ./README.md + + - name: Commit & Push changes to Readme + if: ${{ github.ref == 'refs/heads/main' }} + uses: actions-js/push@master + with: + message: Update coverage on Readme + github_token: ${{ secrets.CI_GITHUB_TOKEN }} diff --git a/flexnlp/notebooks/Federated IMDb PT using FLExible with a GRU.ipynb b/flexnlp/notebooks/Federated IMDb PT using FLExible with a GRU.ipynb index 1149c97..8879f6e 100644 --- a/flexnlp/notebooks/Federated IMDb PT using FLExible with a GRU.ipynb +++ b/flexnlp/notebooks/Federated IMDb PT using FLExible with a GRU.ipynb @@ -53,7 +53,7 @@ "outputs": [], "source": [ "# imdb_dataset = load_dataset('imdb', split=['train', 'test']) # Get the dataset from huggingface library\n", - "train_dataset, test_dataset = torchtext.datasets.AG_NEWS() # Get the dataset from torchtext library\n", + "train_dataset, test_dataset = torchtext.datasets.IMDB() # Get the dataset from torchtext library\n", "unique_classes = set([label for (label, text) in train_dataset])\n", "num_classes = len(unique_classes)" ] @@ -284,20 +284,25 @@ " )\n", " # Take the embeddings size from the embeddings vector.\n", " self.embedding_size = embeddings.shape[1]\n", - " #Create the GRU layer with just one layer.\n", + " # Create the GRU layer with just one layer.\n", " self.gru = nn.GRU(self.embedding_size,\n", " hidden_size,\n", " batch_first=True,\n", " num_layers=1\n", + " bidirectional=True,\n", + " dropout=0.5\n", " )\n", + " # Create a dropout layer for the BiGRU\n", + " self.dropout = nn.Dropout(0.2)\n", " # Create the prediction layer.\n", - " self.fc = nn.Linear(hidden_size, num_classes)\n", + " self.fc = nn.Linear(hidden_size*2, num_classes)\n", "\n", " def forward(self, x):\n", " # x.shape = [batch_size, len]\n", " x = self.emb(x)\n", " # x.shape = [batch_size, len, emb_dim]\n", " _, x = self.gru(x)\n", + " x = self.dropout(torch.cat((x[-2,:,:], x[-1,:,:]), dim=1))\n", " # x.shape = [1, batch_size, hid_dim]\n", " x = self.fc(x)\n", " return x\n", @@ -452,7 +457,7 @@ " #  batch_sampler=batch_sampler_v2(BATCH_SIZE, train_indices))\n", " model = client_flex_model[\"model\"]\n", " # lr = 0.001\n", - " optimizer = client_flex_model['optimizer_func'](model.parameters(), lr=0.1, **client_flex_model[\"optimizer_kwargs\"])\n", + " optimizer = client_flex_model['optimizer_func'](model.parameters(), lr=0.01, **client_flex_model[\"optimizer_kwargs\"])\n", " model = model.train()\n", " model = model.to(device)\n", " criterion = client_flex_model[\"criterion\"]\n", @@ -657,12 +662,10 @@ ] }, { - "cell_type": "code", - "execution_count": null, + "cell_type": "markdown", "metadata": {}, - "outputs": [], "source": [ - "# train_n_rounds(5)" + "Run the following cell in order to train the model for multiple rounds, selecting ya" ] }, { @@ -670,7 +673,9 @@ "execution_count": null, "metadata": {}, "outputs": [], - "source": [] + "source": [ + "train_n_rounds(n_rounds=5)" + ] } ], "metadata": { @@ -689,7 +694,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.3" + "version": "3.11.4" }, "orig_nbformat": 4 },