Skip to content

Commit 1dbc8b4

Browse files
'initial commit'
1 parent c9d847e commit 1dbc8b4

16 files changed

+2870
-0
lines changed

.github/workflows/pre-commit.yaml

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
name: pre-commit-codestyle
2+
3+
concurrency:
4+
group: ${{ github.workflow }}-${{ github.event.number }}-${{ github.event.ref }}
5+
cancel-in-progress: true
6+
7+
on: [pull_request]
8+
9+
jobs:
10+
11+
pre-commit:
12+
runs-on: ubuntu-latest
13+
steps:
14+
- uses: actions/checkout@v4
15+
- uses: actions/setup-python@v5
16+
- uses: pre-commit/action@v3.0.1

.github/workflows/prettier.yml

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
name: Prettier code formatter
2+
3+
on:
4+
pull_request:
5+
branches:
6+
- master
7+
- main
8+
push:
9+
branches:
10+
- master
11+
- main
12+
13+
jobs:
14+
check:
15+
# available images: https://github.com/actions/runner-images#available-images
16+
runs-on: ubuntu-latest
17+
steps:
18+
- name: Checkout 🛎️
19+
uses: actions/checkout@v4
20+
- name: Setup Node.js ⚙️
21+
uses: actions/setup-node@v4
22+
- name: Install Prettier 💾
23+
run: npm install --save-dev --save-exact prettier @shopify/prettier-plugin-liquid
24+
- name: Prettier Check 🔎
25+
id: prettier
26+
run: npx prettier . --check
27+
- name: Create diff 📝
28+
# https://docs.github.com/en/actions/learn-github-actions/expressions#failure
29+
if: ${{ failure() }}
30+
run: |
31+
npx prettier . --write
32+
git diff -- . ':(exclude)package-lock.json' ':(exclude)package.json' > diff.txt
33+
npm install -g diff2html-cli
34+
diff2html -i file -s side -F diff.html -- diff.txt
35+
- name: Upload html diff ⬆️
36+
id: artifact-upload
37+
if: ${{ failure() && steps.prettier.conclusion == 'failure' }}
38+
uses: actions/upload-artifact@v4
39+
with:
40+
name: HTML Diff
41+
path: diff.html
42+
retention-days: 7
43+
- name: Dispatch information to repository 🗣️
44+
if: ${{ failure() && steps.prettier.conclusion == 'failure' && github.event_name == 'pull_request' }}
45+
uses: peter-evans/repository-dispatch@v2
46+
with:
47+
event-type: prettier-failed-on-pr
48+
client-payload: '{"pr_number": "${{ github.event.number }}", "artifact_url": "${{ steps.artifact-upload.outputs.artifact-url }}", "run_id": "${{ github.run_id }}"}'

.gitignore

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -160,3 +160,8 @@ cython_debug/
160160
# and can be added to the global gitignore or merged into this file. For a more nuclear
161161
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
162162
#.idea/
163+
data/NMT/
164+
bfm/Experiments/*
165+
bfm/model_weights/*
166+
wandb/*
167+
0_misc/*

.pre-commit-config.yaml

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
default_language_version:
2+
python: python3
3+
4+
ci:
5+
autofix_commit_msg: |
6+
[pre-commit.ci] auto fixes from pre-commit.com hooks
7+
autofix_prs: true
8+
autoupdate_branch: "master"
9+
autoupdate_commit_msg: "[pre-commit.ci] pre-commit autoupdate"
10+
autoupdate_schedule: quarterly
11+
skip: [ ]
12+
submodules: false
13+
14+
repos:
15+
- repo: https://github.com/pre-commit/pre-commit-hooks
16+
rev: v4.6.0
17+
hooks:
18+
- id: check-yaml
19+
- id: check-json
20+
- id: check-executables-have-shebangs
21+
- id: check-toml
22+
- id: check-docstring-first
23+
- id: check-added-large-files
24+
- id: end-of-file-fixer
25+
- id: trailing-whitespace
26+
- id: check-case-conflict
27+
- id: mixed-line-ending
28+
- id: end-of-file-fixer
29+
- id: check-case-conflict
30+
- id: forbid-new-submodules
31+
- id: pretty-format-json
32+
args: [ "--autofix", "--no-sort-keys", "--indent=4" ]
33+
34+
- repo: https://github.com/charliermarsh/ruff-pre-commit
35+
rev: v0.4.8
36+
hooks:
37+
- id: ruff
38+
args: [ "--ignore=E402,E501,F401", "--fix" ] #, --exit-non-zero-on-fix, ]
39+
- id: ruff
40+
name: ruff lint data notebooks
41+
args: [ "--fix", "--preview", "--select=NPY201" ]
42+
- id: ruff
43+
args: [ "check", "--select", "I", "--fix"]
44+
- id: ruff-format
45+
types_or: [ python, pyi, jupyter ]
46+
47+
48+
49+
- repo: https://github.com/codespell-project/codespell
50+
rev: v2.3.0
51+
hooks:
52+
- id: codespell
53+
args:
54+
- --ignore-words-list=metadat,splitted,meaned,wil,whats,additionals,alle,alot,bund,currenty,datas,farenheit,falsy,fo,haa,hass,iif,incomfort,ines,ist,nam,nd,pres,pullrequests,resset,rime,ser,serie,te,technik,ue,unsecure,withing,zar,MAPE,mape
55+
- --skip="./.*,*.csv,*.json,*.ambr"
56+
- --quiet-level=2
57+
exclude_types: [ csv, json ]
58+
exclude: ^tests/|generated/^.github
59+
60+
- repo: https://github.com/asottile/blacken-docs
61+
rev: 1.16.0
62+
hooks:
63+
- id: blacken-docs
64+
exclude: ^.github

README.md

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
# About
2+
Code for ["General-Purpose Brain Foundation Models for Time-Series Neuroimaging Data"](https://openreview.net/forum?id=HwDQH0r37I)
3+
4+
# Getting Started
5+
## 0. Install the requirements
6+
To install the requirements, run the following command:
7+
```bash
8+
pip install -r requirements.txt
9+
```
10+
## 1. Download and preprocess the data
11+
Download the NMT data from [here](https://ilabel.ai/datasets/Nust-Millitary-Hospital-TUKl-NMT-EEG-Dataset/NMT-Scalp-EEG.zip) and extract it to the `data` folder. or you can use the following command:
12+
```bash
13+
wget https://ilabel.ai/datasets/Nust-Millitary-Hospital-TUKl-NMT-EEG-Dataset/NMT-Scalp-EEG.zip
14+
15+
unzip NMT-Scalp-EEG.zip -d data
16+
```
17+
or you can use the following command:
18+
```bash
19+
gdown 'https://drive.google.com/uc?id=1jD_AcmfoaIfkOiO5lSU4J6IxHZtalnTk'
20+
21+
unzip NMT.zip -d data/NMT/
22+
```
23+
24+
## 2. Preprocess the data
25+
To preprocess the data, run the following command:
26+
```bash
27+
python ./data/preprocess.py \
28+
--dataset nmt \
29+
--start_range 0 \
30+
--end_range 500 \
31+
--exp_path ./data/NMT/NMT_dl/ \
32+
--nmt_raw_path ./data/NMT/nmt_scalp_eeg_dataset/
33+
```
34+
It will preprocess the data and save it as .arrow files in the `data/NMT/nmt_dl/` folder.
35+
36+
## 3. Train the model
37+
To train the model, run the following command:
38+
```bash
39+
accelerate launch bfm/train/train.py \
40+
--config bfm/configs/bfm-t5-base-nmt.yaml \
41+
--experiment-name "bfm-base" \
42+
--wandb-mode online \
43+
--wandb-entity <your_wandb_entity> \
44+
--model-id google/t5-efficient-base \
45+
--seed 6 \
46+
--learning-rate 0.001 \
47+
--per-device-train-batch-size 32 \
48+
--no-random-init \
49+
--n-gpus 4 \
50+
--max-steps 2000
51+
```
52+
This will train the model on the NMT dataset using the T5-base model. You can modify the config file to use a different model or dataset.
53+
54+
## 4. Evaluate the model
55+
To evaluate the model, run the following command:
56+
```bash
57+
CUDA_VISIBLE_DEVICES=0 python bfm/evaluate/evaluate.py \
58+
--config_path "bfm/configs/bfm-inference.yaml" \
59+
--directory_path "./bfm/Experiments/bfm-base_nmt" \
60+
--seed 2024 \
61+
--device "cuda"
62+
```
63+
[Note:] You can also use 'data/download_moabb_datasets.py' to download the MOABB datasets. Then you can use 'data/preprocess_moabb.py' to preprocess the MOABB datasets and evaluate the model on them.
64+
65+
# Citation
66+
If you find this code useful, please consider citing our paper:
67+
```
68+
@inproceedings{
69+
bayazi2024generalpurpose,
70+
title={General-Purpose Brain Foundation Models for Time-Series Neuroimaging Data},
71+
author={Mohammad Javad Darvishi Bayazi and Hena Ghonia and Roland Riachi and Bruno Aristimunha and Arian Khorasani and Md Rifat Arefin and Amin Darabi and Guillaume Dumas and Irina Rish},
72+
booktitle={NeurIPS Workshop on Time Series in the Age of Large Models},
73+
year={2024},
74+
url={https://openreview.net/forum?id=HwDQH0r37I}
75+
}
76+
```

bfm/configs/bfm-inference-moabb.yaml

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
- name: BNCI2014_001
2+
test_path: ./data/moabb/BNCI2014_001_dl/train/
3+
num_rolls: 1
4+
- name: BNCI2014_004
5+
test_path: ./data/moabb/BNCI2014_004_dl/train/
6+
num_rolls: 1
7+
- name: BNCI2015_001
8+
test_path: ./data/moabb/BNCI2015_001_dl/train/
9+
num_rolls: 1
10+
- name: Weibo2014
11+
test_path: ./data/moabb/Weibo2014_dl/train/
12+
num_rolls: 1
13+
- name: Cho2017
14+
test_path: ./data/moabb/Cho2017_dl/train/
15+
num_rolls: 1
16+
- name: Liu2024
17+
test_path: ./data/moabb/Liu2024_dl/train/
18+
num_rolls: 1
19+
- name: PhysionetMI
20+
test_path: ./data/moabb/PhysionetMI_dl/train/
21+
num_rolls: 1

bfm/configs/bfm-inference.yaml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
- name: nmt_oodomain
2+
test_path: ./data/NMT/NMT_dl/nmt_dl/test/
3+
num_rolls: 1

bfm/configs/bfm-t5-base-nmt.yaml

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
training_data_paths:
2+
- ./data/NMT/NMT_dl/nmt_dl/train/
3+
validation_data_paths:
4+
- ./data/NMT/NMT_dl/nmt_dl/val/
5+
dataset: "nmt"
6+
project_dir: "./bfm/"
7+
experiment_name: "test_experiment_v0"
8+
wandb_mode: "offline"
9+
n_gpus: 8
10+
wandb_entity: "brain_fomo"
11+
wandb_project:
12+
context_length: 512
13+
prediction_length: 64
14+
min_past: 60
15+
max_steps: 200_000
16+
save_steps: 50
17+
log_steps: 50
18+
per_device_train_batch_size: 32
19+
learning_rate: 0.001
20+
optim: adamw_torch
21+
num_samples: 20
22+
shuffle_buffer_length: 100
23+
gradient_accumulation_steps: 1
24+
model_id: google/t5-efficient-base
25+
model_type: seq2seq
26+
random_init: true
27+
tie_embeddings: true
28+
output_dir: ./output/
29+
tf32: true
30+
torch_compile: true
31+
tokenizer_class: "MeanScaleUniformBins"
32+
tokenizer_kwargs:
33+
low_limit: -15.0
34+
high_limit: 15.0
35+
n_tokens: 4096
36+
lr_scheduler_type: linear
37+
warmup_ratio: 0.0
38+
dataloader_num_workers: 1
39+
max_missing_prop: 0.9
40+
use_eos_token: true

0 commit comments

Comments
 (0)