File tree Expand file tree Collapse file tree 7 files changed +1697
-12
lines changed Expand file tree Collapse file tree 7 files changed +1697
-12
lines changed Original file line number Diff line number Diff line change 1
- # Docker container with poetry for python package management
1
+ FROM python:3.11-slim
2
2
3
- FROM python:3.10-slim
4
-
5
- # Install poetry
6
3
RUN pip install poetry
7
4
8
5
# Set the working directory
9
6
WORKDIR /app
10
7
11
- # Copy the pyproject.toml
8
+ # Install build tools for Snakemake (gcc, make, etc.)
9
+ RUN apt-get update && apt-get install -y build-essential
10
+
11
+ # Copy the pyproject.toml file
12
12
COPY pyproject.toml /app/
13
13
14
14
# Install the dependencies
15
15
RUN poetry install --no-root
16
16
17
- # Copy the rest of the files
17
+ # Copy the rest of the application files
18
18
COPY . /app
19
19
20
20
# Install the package
21
21
RUN poetry install
22
22
23
- # Run the application
24
- CMD ["help" ]
23
+ # Install Snakemake using Poetry
24
+ RUN poetry add snakemake
25
+
26
+ # Set the entry point for the container
25
27
ENTRYPOINT ["poetry" , "run" ]
28
+
29
+ CMD ["help" ]
Original file line number Diff line number Diff line change
1
+ # TODO - Refactor to input args to the Snakemake file
2
+ WORKFLOW_IDS = range (1 ,11 )
3
+ VERSIONS = ['1' ]
4
+ OUTPUT_DIRS = "data"
5
+ MERGED_FILE = "merged.ttl"
6
+
7
+
8
+ def list_expected_files ():
9
+ files = []
10
+ for wf_id in WORKFLOW_IDS :
11
+ for ver in VERSIONS :
12
+ files .append (f"{ OUTPUT_DIRS } /{ wf_id } _{ ver } _ro-crate-metadata.json" )
13
+ return files
14
+
15
+ rule all :
16
+ input :
17
+ MERGED_FILE
18
+
19
+ rule source_ro_crates :
20
+ output :
21
+ "created_files.json"
22
+ shell :
23
+ """
24
+ # Create the output directory if it doesn't exist:
25
+ mkdir -p {OUTPUT_DIRS}
26
+
27
+ # Run the source_crates script to download the RO Crate metadata:
28
+ python workflowhub_graph/source_crates.py --workflow-ids 1-10 --prod --all-versions
29
+
30
+ # After sourcing, check which files were actually created:
31
+ python workflowhub_graph/check_outputs.py --workflow-ids 1-10 --versions {VERSIONS} --output-dir {OUTPUT_DIRS}
32
+ """
33
+
34
+ rule report_created_files :
35
+ input :
36
+ "created_files.json"
37
+ shell :
38
+ """
39
+ echo "Files created:"
40
+ cat created_files.json
41
+ """
42
+
43
+ rule merge_files :
44
+ input :
45
+ "created_files.json"
46
+ output :
47
+ MERGED_FILE
48
+ run :
49
+ import json
50
+ import os
51
+
52
+ # Load the list of created files:
53
+ with open ("created_files.json" ) as f :
54
+ created_files = json .load (f )
55
+
56
+ files_to_merge = [f"data/{ os .path .basename (file )} " for file in created_files ]
57
+
58
+ # If no files are available to merge, raise an exception:
59
+ if not files_to_merge :
60
+ raise ValueError ("No files in to merge in data directory." )
61
+
62
+ file_patterns = " " .join (files_to_merge )
63
+
64
+ # Merge the JSON-LD files into a single RDF graph and output as a TTL file
65
+ shell (f"""
66
+ python workflowhub_graph/merge.py { output [0 ]} -p "data/*.json"
67
+ """ )
You can’t perform that action at this time.
0 commit comments