tinkoff-ai · Howuhh · Jul 19, 2023 · Jul 18, 2023 · Jul 18, 2023 · Jul 18, 2023
diff --git a/.github/workflows/codestyle.yml b/.github/workflows/codestyle.yml
@@ -1,45 +1,25 @@
-name: codestyle
-# <- standard block end ->
+name: codestyle check
 on:
   push:
     branches:
       - main
   pull_request:
     branches:
-      - dev
-      - develop
       - main
 
-
 jobs:
-
   build:
-    name: codestyle
-    runs-on: ${{ matrix.os }}
-    strategy:
-      fail-fast: false
-      max-parallel: 4
-      matrix:
-        os: [ubuntu-20.04]
-        python-version: [3.8]
-    timeout-minutes: 30
+    runs-on: ubuntu-latest
     steps:
-      - uses: actions/checkout@v2
-
-      - name: set up Python ${{ matrix.python-version }}
-        uses: actions/setup-python@v1
+      - uses: actions/checkout@v3
+      - name: Set up Python 3.9
+        uses: actions/setup-python@v4
         with:
-          python-version: ${{ matrix.python-version }}
-
-      - name: install dependencies
+          python-version: "3.9"
+      - name: Install dependencies
         run: |
-          # python -m pip install --upgrade --user pip
+          python -m pip install --upgrade pip
           pip install -r requirements/requirements_dev.txt
-          python --version
-          pip --version
-          pip list
-        shell: bash
-# <- standard block end ->
       - name: check codestyle
         run: |
-          catalyst-check-codestyle --line-length 89
+          ruff --config pyproject.toml --diff .
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -1,13 +1,6 @@
 repos:
-  - repo: https://github.com/catalyst-team/codestyle
-    rev: 'v21.09.2'
+  - repo: https://github.com/charliermarsh/ruff-pre-commit
+    rev: 'v0.0.278'
     hooks:
-      - id: catalyst-make-codestyle
-        args: [--line-length=89]
-  - repo: https://github.com/catalyst-team/codestyle
-    rev: 'v21.09.2'
-    hooks:
-      - id: catalyst-check-codestyle
-        args: [--line-length=89]
-
-exclude: __init__.py
+      - id: ruff
+        args: [--fix]
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -0,0 +1,104 @@
+# CORL Contribution Guidelines
+
+We welcome:
+
+- Bug reports
+- Pull requests for bug fixes
+- Logs and documentation improvements
+- New algorithms and datasets
+- Better hyperparameters (but with proofs)
+
+## Contributing to the codebase
+
+Contributing code is done through standard github methods:
+
+```commandline
+git clone git@github.com:tinkoff-ai/CORL.git
+cd CORL
+pip install -r requirements/requirements_dev.txt
+```
+
+1. Fork this repo
+2. Make a change and commit your code
+3. Submit a pull request. It will be reviewed by maintainers, and they'll give feedback or make requests as applicable
+
+### Code style
+
+The CI will run several checks on the new code pushed to the CORL repository. 
+These checks can also be run locally without waiting for the CI by following the steps below:
+1. [install `pre-commit`](https://pre-commit.com/#install),
+2. install the Git hooks by running `pre-commit install`.
+
+Once those two steps are done, the Git hooks will be run automatically at every new commit. 
+The Git hooks can also be run manually with `pre-commit run --all-files`, and
+if needed they can be skipped (not recommended) with `git commit --no-verify`.
+
+We use [Ruff](https://github.com/astral-sh/ruff) as our main linter. If you want to see possible 
+problems before pre-commit, you can run `ruff check --diff .` to see exact linter suggestions and future fixes.
+
+## Adding new algorithms
+
+All new algorithms should go to the `algorithms/contrib/offline` for just 
+offline algorithms and to the `algorithms/contrib/finetune` for the offline-to-online algorithms. 
+We as a team try to keep the core as reliable and reproducible as possible, 
+but we may not have the resources to support all future algorithms. 
+Therefore, this separation is necessary, as we cannot guarantee that all 
+algorithms from `algorithms/contrib` exactly reproduce the results of their original publications.
+
+Make sure your new code is properly documented and all references to the original implementations and papers are present (for example as in [Decision Transformer](algorithms/offline/dt.py)). 
+Please, *explain all the tricks and possible differences from the original implementation in as much detail as possible*. 
+Keep in mind that this code may be used by other researchers. Make their lives easier!
+
+### Considerations
+While we welcome any algorithms, it is better to open an issue with the proposal before 
+so we can discuss the details. Unfortunately, not all algorithms are equally 
+easy to understand and reproduce. We may be able to give a couple of advices to you,
+or on the contrary warn you that this particular algorithm will require too much 
+computational resources to fully reproduce the results, and it is better to do something else.
+
+### Running benchmarks
+
+Although you will have to do a hyperparameter search while reproducing the algorithm, 
+in the end we expect to see final configs in `configs/contrib/<algo_type>/<algo_name>/<dataset_name>.yaml` with the best hyperparameters for all 
+datasets considered. The configs should be in `yaml` format, containing all hyperparameters sorted 
+in alphabetical order (see existing configs for an inspiration).
+
+Use these conventions to name your runs in the configs:
+1. `name: <algo_name>`
+2. `group: <algo_name>-<dataset_name>-multiseed-v0`, increment version if needed
+3. use our [\_\_post_init\_\_](https://github.com/tinkoff-ai/CORL/blob/962688b405f579a1ce6ec1b57e6369aaf76f9e69/algorithms/offline/awac.py#L48) implementation in your config dataclass
+
+Since we are releasing wandb logs for all algorithms, you will need to submit multiseed (~4 seeds) 
+training runs the `CORL` project in the wandb [corl-team](https://wandb.ai/corl-team) organization. We'll invite you there when the time will come.
+
+We usually use wandb sweeps for this. You can use this example config (it will work with pyrallis as it expects `config_path` cli argument):
+```yaml
+# sweep_config.yaml
+entity: corl-team
+project: CORL
+program: algorithms/contrib/<algo_name>.py
+method: grid
+parameters:
+  config_path:
+    # algo_type is offline or finetune (see sections above)
+    values: [
+        "configs/contrib/<algo_type>/<algo_name>/<dataset_name_1>.yaml",
+        "configs/contrib/<algo_type>/<algo_name>/<dataset_name_2>.yaml",
+        "configs/contrib/<algo_type>/<algo_name>/<dataset_name_3>.yaml",
+    ]
+  train_seed:
+    values: [0, 1, 2, 3]
+```
+Then proceed as usual. Create wandb sweep with `wandb sweep sweep_config.yaml`, then run agents with `wandb agent <agent_id>`.
+
+Based on the results, you will need to make wandb reports to make it easier for other users to understand. 
+You can use any of the already existing ones as an example (see [README.md](README.md)).
+
+### Checklist
+
+- [ ] Issue about new algorithm is open
+- [ ] Single-file implementation is added to the `algorithms/contrib`
+- [ ] PR has passed all the tests
+- [ ] Evidence that implementation reproduces original results is provided
+- [ ] Configs with the best hyperparameters for all datasets are added to the `configs/contrib`
+- [ ] Logs and reports for best hyperparameters are submitted to our wandb organization
diff --git a/algorithms/finetune/awac.py b/algorithms/finetune/awac.py
@@ -1,9 +1,9 @@
-from typing import Any, Dict, List, Optional, Tuple, Union
-from copy import deepcopy
-from dataclasses import asdict, dataclass
 import os
 import random
 import uuid
+from copy import deepcopy
+from dataclasses import asdict, dataclass
+from typing import Any, Dict, List, Optional, Tuple, Union
 
 import d4rl
 import gym
@@ -12,8 +12,8 @@
 import torch
 import torch.nn as nn
 import torch.nn.functional
-from tqdm import trange
 import wandb
+from tqdm import trange
 
 TensorBatch = List[torch.Tensor]
 

diff --git a/algorithms/finetune/cal_ql.py b/algorithms/finetune/cal_ql.py
@@ -1,22 +1,22 @@
 # source: https://github.com/nakamotoo/Cal-QL/tree/main
 # https://arxiv.org/pdf/2303.05479.pdf
-from typing import Any, Dict, List, Optional, Tuple, Union
-from copy import deepcopy
-from dataclasses import asdict, dataclass
 import os
-from pathlib import Path
 import random
 import uuid
+from copy import deepcopy
+from dataclasses import asdict, dataclass
+from pathlib import Path
+from typing import Any, Dict, List, Optional, Tuple, Union
 
 import d4rl
 import gym
 import numpy as np
 import pyrallis
 import torch
-from torch.distributions import Normal, TanhTransform, TransformedDistribution
 import torch.nn as nn
 import torch.nn.functional as F
 import wandb
+from torch.distributions import Normal, TanhTransform, TransformedDistribution
 
 TensorBatch = List[torch.Tensor]
 
@@ -285,22 +285,22 @@ def get_return_to_go(dataset: Dict, env: gym.Env, config: TrainConfig) -> np.nda
         ep_len += 1
         is_last_step = (
             (t == N - 1)
-            or (  # noqa
+            or (
                 np.linalg.norm(
                     dataset["observations"][t + 1] - dataset["next_observations"][t]
                 )
-                > 1e-6  # noqa
+                > 1e-6
             )
-            or ep_len == env._max_episode_steps  # noqa
+            or ep_len == env._max_episode_steps
         )
 
         if d or is_last_step:
             discounted_returns = [0] * ep_len
             prev_return = 0
             if (
                 config.is_sparse_reward
-                and r  # noqa
-                == env.ref_min_score * config.reward_scale + config.reward_bias  # noqa
+                and r
+                == env.ref_min_score * config.reward_scale + config.reward_bias
             ):
                 discounted_returns = [r / (1 - config.discount)] * ep_len
             else:
@@ -818,14 +818,14 @@ def _q_loss(
                 torch.exp(self.log_alpha_prime()), min=0.0, max=1000000.0
             )
             cql_min_qf1_loss = (
-                alpha_prime  # noqa
-                * self.cql_alpha  # noqa
-                * (cql_qf1_diff - self.cql_target_action_gap)  # noqa
+                alpha_prime
+                * self.cql_alpha
+                * (cql_qf1_diff - self.cql_target_action_gap)
             )
             cql_min_qf2_loss = (
-                alpha_prime  # noqa
-                * self.cql_alpha  # noqa
-                * (cql_qf2_diff - self.cql_target_action_gap)  # noqa
+                alpha_prime
+                * self.cql_alpha
+                * (cql_qf2_diff - self.cql_target_action_gap)
             )
 
             self.alpha_prime_optimizer.zero_grad()

diff --git a/algorithms/finetune/cql.py b/algorithms/finetune/cql.py
@@ -1,23 +1,23 @@
 # source: https://github.com/young-geng/CQL/tree/934b0e8354ca431d6c083c4e3a29df88d4b0a24d
 # STRONG UNDER-PERFORMANCE ON PART OF ANTMAZE TASKS. BUT IN IQL PAPER IT WORKS SOMEHOW
 # https://arxiv.org/pdf/2006.04779.pdf
-from typing import Any, Dict, List, Optional, Tuple, Union
-from copy import deepcopy
-from dataclasses import asdict, dataclass
 import os
-from pathlib import Path
 import random
 import uuid
+from copy import deepcopy
+from dataclasses import asdict, dataclass
+from pathlib import Path
+from typing import Any, Dict, List, Optional, Tuple, Union
 
 import d4rl
 import gym
 import numpy as np
 import pyrallis
 import torch
-from torch.distributions import Normal, TanhTransform, TransformedDistribution
 import torch.nn as nn
 import torch.nn.functional as F
 import wandb
+from torch.distributions import Normal, TanhTransform, TransformedDistribution
 
 TensorBatch = List[torch.Tensor]
 
@@ -729,14 +729,14 @@ def _q_loss(
                 torch.exp(self.log_alpha_prime()), min=0.0, max=1000000.0
             )
             cql_min_qf1_loss = (
-                alpha_prime  # noqa
-                * self.cql_alpha  # noqa
-                * (cql_qf1_diff - self.cql_target_action_gap)  # noqa
+                alpha_prime
+                * self.cql_alpha
+                * (cql_qf1_diff - self.cql_target_action_gap)
             )
             cql_min_qf2_loss = (
-                alpha_prime  # noqa
-                * self.cql_alpha  # noqa
-                * (cql_qf2_diff - self.cql_target_action_gap)  # noqa
+                alpha_prime
+                * self.cql_alpha
+                * (cql_qf2_diff - self.cql_target_action_gap)
             )
 
             self.alpha_prime_optimizer.zero_grad()

diff --git a/algorithms/finetune/iql.py b/algorithms/finetune/iql.py
@@ -1,23 +1,23 @@
 # source: https://github.com/gwthomas/IQL-PyTorch
 # https://arxiv.org/pdf/2110.06169.pdf
-from typing import Any, Callable, Dict, List, Optional, Tuple, Union
 import copy
-from dataclasses import asdict, dataclass
 import os
-from pathlib import Path
 import random
 import uuid
+from dataclasses import asdict, dataclass
+from pathlib import Path
+from typing import Any, Callable, Dict, List, Optional, Tuple, Union
 
 import d4rl
 import gym
 import numpy as np
 import pyrallis
 import torch
-from torch.distributions import Normal
 import torch.nn as nn
 import torch.nn.functional as F
-from torch.optim.lr_scheduler import CosineAnnealingLR
 import wandb
+from torch.distributions import Normal
+from torch.optim.lr_scheduler import CosineAnnealingLR
 
 TensorBatch = List[torch.Tensor]
 

diff --git a/algorithms/finetune/spot.py b/algorithms/finetune/spot.py
@@ -1,12 +1,12 @@
 # source: https://github.com/thuml/SPOT/tree/58c591dc48fbd9ff632b7494eab4caf778e86f4a
 # https://arxiv.org/pdf/2202.06239.pdf
-from typing import Any, Dict, List, Optional, Tuple, Union
 import copy
-from dataclasses import asdict, dataclass
 import os
-from pathlib import Path
 import random
 import uuid
+from dataclasses import asdict, dataclass
+from pathlib import Path
+from typing import Any, Dict, List, Optional, Tuple, Union
 
 import d4rl
 import gym
@@ -456,7 +456,7 @@ def forward(self, state: torch.Tensor, action: torch.Tensor) -> torch.Tensor:
         return self.net(sa)
 
 
-class SPOT:  # noqa
+class SPOT:
     def __init__(
         self,
         max_action: float,