Skip to content

Commit

Permalink
chore: add Beam pipeline requirements
Browse files Browse the repository at this point in the history
  • Loading branch information
tskir committed Nov 23, 2023
1 parent f2f3559 commit 092dee0
Show file tree
Hide file tree
Showing 3 changed files with 7 additions and 3 deletions.
3 changes: 2 additions & 1 deletion src/beam/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,8 @@ python -m eqtl_catalogue \
--project open-targets-genetics-dev \
--staging_location gs://genetics-portal-dev-staging/beam \
--template_location gs://genetics_etl_python_playground/beam/eqtl_catalogue \
--region europe-west1
--region europe-west1 \
--requirements_file requirements.txt
```

To run a pipeline:
Expand Down
4 changes: 2 additions & 2 deletions src/beam/eqtl_catalogue.py
Original file line number Diff line number Diff line change
Expand Up @@ -180,7 +180,7 @@ def process(
# Skip header.
continue
data = row.split("\t")
if i == 100000:
if i == 1000000:
break
# Perform actions depending on the chromosome.
chromosome = data[chromosome_index]
Expand Down Expand Up @@ -242,7 +242,7 @@ def run_pipeline() -> None:
with beam.Pipeline(options=PipelineOptions()) as pipeline:
(
pipeline
| "List input files" >> beam.Create(get_input_files()[:1])
| "List input files" >> beam.Create(get_input_files())
| "Parse data" >> beam.ParDo(ParseData())
| "Write to Parquet" >> beam.ParDo(WriteData())
)
Expand Down
3 changes: 3 additions & 0 deletions src/beam/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
pandas
fsspec
gcsfs

0 comments on commit 092dee0

Please sign in to comment.