From b27473fc7ab523278ed7d0ea57d9525a447e28b8 Mon Sep 17 00:00:00 2001 From: JJstin <66219847+JJstin@users.noreply.github.com> Date: Tue, 16 Jul 2024 13:00:21 -0400 Subject: [PATCH] instructions typo fix and deps update (#38) --- docs/_docs/home.md | 10 +++++----- modeling/README.md | 10 +++++----- modeling/requirements.txt | 5 +++-- 3 files changed, 13 insertions(+), 12 deletions(-) diff --git a/docs/_docs/home.md b/docs/_docs/home.md index 9e49f44..3af93ef 100644 --- a/docs/_docs/home.md +++ b/docs/_docs/home.md @@ -88,7 +88,7 @@ demo_names = wl.utils.load_demo_names_in_split(split_path, split='train') demo_names = ['saabwsg', 'ygprzve', 'iqaazif'] # 3 random demo from valid # Load the demonstrations -demos = [wl.Demonstration(name, base_dir=base_dir) for name in names] +demos = [wl.Demonstration(name, base_dir=base_dir) for name in demo_names] # Select a demo to work with demo = demos[0] @@ -183,13 +183,13 @@ from weblinx.processing import load_candidate_elements # Download the candidates elements generated by the MiniLM-L6-dmr model snapshot_download( - repo_id="McGill-NLP/WebLINX-full", - repo_type="dataset", - allow_patterns="candidates/*.jsonl", + repo_id="McGill-NLP/WebLINX-full", + repo_type="dataset", + allow_patterns="candidates/*.jsonl", local_dir="./wl_data/" ) -split = "train" # or valid, test, test_geo, test_vis, test_web, test_cat +split = "train" # or valid, test, test_geo, test_vis, test_web, test_cat candidates_path = f"./wl_data/candidates/{split}.jsonl" # Access the candidates candidates = load_candidate_elements(path=candidates_path) diff --git a/modeling/README.md b/modeling/README.md index 8000bfb..d783263 100644 --- a/modeling/README.md +++ b/modeling/README.md @@ -14,9 +14,9 @@ snapshot_download( # candidates files snapshot_download( - repo_id="McGill-NLP/WebLINX-full", - repo_type="dataset", - allow_patterns="candidates/*.jsonl", + repo_id="McGill-NLP/WebLINX-full", + repo_type="dataset", + allow_patterns="candidates/*.jsonl", local_dir="./wl_data/" ) ``` @@ -72,7 +72,7 @@ ln -s /location/of/your/full/data /location/of/project/weblinx/modeling/wl_data For example, if your data is located at `/mnt/research/scratch/users/jdoe/WebLINX-full` but your cloned `weblinx` repository is at `~/dev/weblinx`, then you'd run: ```bash -ln -s /mnt/research/scratch/users/jdoe/WebLINX-full ~/dev/weblinx/modeling/wl_data +ln -s /mnt/research/scratch/users/jdoe/WebLINX-full/* ~/dev/weblinx/modeling/wl_data ``` Which corresponds to the `data.base_dir` specified in `config.yml`, which is `"${project_dir}/wl_data/demonstrations/"`. @@ -122,7 +122,7 @@ The `scores.jsonl` and `results.json` files will be saved at the `cfg.eval.resul # Change the following paths to match your setup orig_dir="/path/to/weblinx/modeling/results/dmr/sentence-transformers/all-MiniLM-L6-v2" -# This is the directory where the candidates are stored +# This is the directory where the candidates are stored new_dir="/path/to/wl_data/candidates" # You need to move the train split if you plan to use it for training the action model diff --git a/modeling/requirements.txt b/modeling/requirements.txt index baf8273..9f740c3 100644 --- a/modeling/requirements.txt +++ b/modeling/requirements.txt @@ -1,4 +1,4 @@ -transformers==4.35.0 # Future version may break the code, upgrade with caution +transformers==4.42.3 # Future version may break the code, upgrade with caution. Previous stable version was 4.35.0 lxml numpy datasets @@ -19,4 +19,5 @@ coloredlogs sacrebleu bert-score packaging -ninja \ No newline at end of file +ninja +huggingface-hub>=0.23.4, <0.24 \ No newline at end of file