From 09c7df27385b9b7a90cde05c508bfbd722706703 Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Mon, 30 Sep 2024 11:55:03 -0400 Subject: [PATCH 1/3] fixed #76 --- eido/_version.py | 2 +- eido/validation.py | 4 ++++ tests/conftest.py | 16 +++++++++++++--- .../project_config.yaml | 0 .../sample_table.csv | 0 .../subsample_table.csv | 0 .../peps/value_check_pep/project_config.yaml | 6 ++++++ tests/data/peps/value_check_pep/sample_table.csv | 7 +++++++ tests/data/schemas/value_check_schema.yaml | 16 ++++++++++++++++ tests/test_validations.py | 13 +++++++++---- 10 files changed, 56 insertions(+), 8 deletions(-) rename tests/data/peps/{test_file_existence => test_file_existing}/project_config.yaml (100%) rename tests/data/peps/{test_file_existence => test_file_existing}/sample_table.csv (100%) rename tests/data/peps/{test_file_existence => test_file_existing}/subsample_table.csv (100%) create mode 100644 tests/data/peps/value_check_pep/project_config.yaml create mode 100644 tests/data/peps/value_check_pep/sample_table.csv create mode 100644 tests/data/schemas/value_check_schema.yaml diff --git a/eido/_version.py b/eido/_version.py index d31c31e..788da1f 100644 --- a/eido/_version.py +++ b/eido/_version.py @@ -1 +1 @@ -__version__ = "0.2.3" +__version__ = "0.2.4" diff --git a/eido/validation.py b/eido/validation.py index e409a13..e75d7e6 100644 --- a/eido/validation.py +++ b/eido/validation.py @@ -43,6 +43,10 @@ def _validate_object(obj: Mapping, schema: Union[str, dict], sample_name_colname instance_name = error.instance[sample_name_colname] except KeyError: instance_name = "project" + except TypeError: + instance_name = obj["samples"][error.absolute_path[1]][ + sample_name_colname + ] errors_by_type[error.message].append( { "type": error.message, diff --git a/tests/conftest.py b/tests/conftest.py index 905591e..eb931d9 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -124,10 +124,20 @@ def save_result_mock(mocker): @pytest.fixture -def test_file_existence_schema(schemas_path): +def test_file_existing_schema(schemas_path): return os.path.join(schemas_path, "schema_test_file_exist.yaml") @pytest.fixture -def test_file_existance_pep(peps_path): - return os.path.join(peps_path, "test_file_existence", "project_config.yaml") +def test_file_existing_pep(peps_path): + return os.path.join(peps_path, "test_file_existing", "project_config.yaml") + + +@pytest.fixture +def test_schema_value_check(schemas_path): + return os.path.join(schemas_path, "value_check_schema.yaml") + + +@pytest.fixture +def test_file_value_check(peps_path): + return os.path.join(peps_path, "value_check_pep", "project_config.yaml") diff --git a/tests/data/peps/test_file_existence/project_config.yaml b/tests/data/peps/test_file_existing/project_config.yaml similarity index 100% rename from tests/data/peps/test_file_existence/project_config.yaml rename to tests/data/peps/test_file_existing/project_config.yaml diff --git a/tests/data/peps/test_file_existence/sample_table.csv b/tests/data/peps/test_file_existing/sample_table.csv similarity index 100% rename from tests/data/peps/test_file_existence/sample_table.csv rename to tests/data/peps/test_file_existing/sample_table.csv diff --git a/tests/data/peps/test_file_existence/subsample_table.csv b/tests/data/peps/test_file_existing/subsample_table.csv similarity index 100% rename from tests/data/peps/test_file_existence/subsample_table.csv rename to tests/data/peps/test_file_existing/subsample_table.csv diff --git a/tests/data/peps/value_check_pep/project_config.yaml b/tests/data/peps/value_check_pep/project_config.yaml new file mode 100644 index 0000000..66c4380 --- /dev/null +++ b/tests/data/peps/value_check_pep/project_config.yaml @@ -0,0 +1,6 @@ +description: None +name: encode_prj +pep_version: 2.0.0 +project_name: value_check_pep +sample_table: sample_table.csv +subsample_table: [] diff --git a/tests/data/peps/value_check_pep/sample_table.csv b/tests/data/peps/value_check_pep/sample_table.csv new file mode 100644 index 0000000..cefc2aa --- /dev/null +++ b/tests/data/peps/value_check_pep/sample_table.csv @@ -0,0 +1,7 @@ +sample_name,file_name,genome,assay,cell_line,target,format_type +encode_4,ENCFF452DAM.bed.gz,hg38,Histone ChIP-seq,skeletal muscle myoblast,H3K36me3,narrowPeak +encode_20,ENCFF121AXG.bed.gz,hg38,DNase-seq,RPMI7951,,tssPeak +encode_21,ENCFF710ECJ.bed.gz,hg38,DNase-seq,RPMI7951,,broadPeak +encode_22,ENCFF945FZN.bed.gz,hg38,DNase-seq,RPMI7951,,narrowPeak +encode_23,ENCFF322PQO.bed.gz,hg38,DNase-seq,RPMI7951,,tssPeak +encode_24,ENCFF322PQO.bed.gz,hg38,DNase-seq,RPMI7951,,tssPeak1 diff --git a/tests/data/schemas/value_check_schema.yaml b/tests/data/schemas/value_check_schema.yaml new file mode 100644 index 0000000..fb2352d --- /dev/null +++ b/tests/data/schemas/value_check_schema.yaml @@ -0,0 +1,16 @@ +description: bedboss run-all pep schema +properties: + samples: + items: + properties: + format_type: + description: whether the regions are narrow (transcription factor implies + narrow, histone mark implies broad peaks) + enum: + - narrowPeak + - broadPeak + type: string + type: object + type: array +required: +- samples diff --git a/tests/test_validations.py b/tests/test_validations.py index a217ca4..6bea0a4 100644 --- a/tests/test_validations.py +++ b/tests/test_validations.py @@ -1,7 +1,6 @@ import urllib import pytest -from jsonschema.exceptions import ValidationError from peppy import Project from peppy.utils import load_yaml @@ -140,9 +139,15 @@ def test_validate_detects_invalid(self, schema_invalid_file_path, remote_pep_cfg ) def test_validate_file_existance( - self, test_file_existance_pep, test_file_existence_schema + self, test_file_existing_pep, test_file_existing_schema ): - schema_path = test_file_existence_schema - prj = Project(test_file_existance_pep) + schema_path = test_file_existing_schema + prj = Project(test_file_existing_pep) with pytest.raises(PathAttrNotFoundError): validate_input_files(prj, schema_path) + + def test_validation_values(self, test_schema_value_check, test_file_value_check): + schema_path = test_schema_value_check + prj = Project(test_file_value_check) + with pytest.raises(EidoValidationError): + validate_project(project=prj, schema=schema_path) From 5fd4867945e194b11e2aa6ab11ad2dc630be13af Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Mon, 30 Sep 2024 12:28:06 -0400 Subject: [PATCH 2/3] fixed #70 --- eido/conversion_plugins.py | 3 +-- eido/output_formatters.py | 2 +- tests/conftest.py | 5 +++++ .../multiple_subsamples/project_config.yaml | 14 ++++++++++++++ .../peps/multiple_subsamples/sample_table.csv | 5 +++++ .../multiple_subsamples/subsample_table1.csv | 6 ++++++ .../multiple_subsamples/subsample_table2.csv | 6 ++++++ tests/test_conversions.py | 18 +++++++++++++++++- 8 files changed, 55 insertions(+), 4 deletions(-) create mode 100644 tests/data/peps/multiple_subsamples/project_config.yaml create mode 100644 tests/data/peps/multiple_subsamples/sample_table.csv create mode 100644 tests/data/peps/multiple_subsamples/subsample_table1.csv create mode 100644 tests/data/peps/multiple_subsamples/subsample_table2.csv diff --git a/eido/conversion_plugins.py b/eido/conversion_plugins.py index 01dd550..0f62395 100644 --- a/eido/conversion_plugins.py +++ b/eido/conversion_plugins.py @@ -42,8 +42,7 @@ def yaml_pep_filter(p, **kwargs) -> Dict[str, str]: """ from yaml import dump - data = p.config.to_dict() - return {"project": dump(data, default_flow_style=False)} + return {"project": dump(p.config, default_flow_style=False)} def csv_pep_filter(p, **kwargs) -> Dict[str, str]: diff --git a/eido/output_formatters.py b/eido/output_formatters.py index 8d20a12..f9d2486 100644 --- a/eido/output_formatters.py +++ b/eido/output_formatters.py @@ -110,7 +110,7 @@ def _convert_sample_to_row( ): value = sample[attribute][sample_index] else: - value = sample[attribute] + value = sample.get(attribute) sample_row.append(value or "") diff --git a/tests/conftest.py b/tests/conftest.py index eb931d9..8e47b94 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -141,3 +141,8 @@ def test_schema_value_check(schemas_path): @pytest.fixture def test_file_value_check(peps_path): return os.path.join(peps_path, "value_check_pep", "project_config.yaml") + + +@pytest.fixture +def test_multiple_subs(peps_path): + return os.path.join(peps_path, "multiple_subsamples", "project_config.yaml") diff --git a/tests/data/peps/multiple_subsamples/project_config.yaml b/tests/data/peps/multiple_subsamples/project_config.yaml new file mode 100644 index 0000000..1063907 --- /dev/null +++ b/tests/data/peps/multiple_subsamples/project_config.yaml @@ -0,0 +1,14 @@ +pep_version: "2.1.0" +sample_table: sample_table.csv +subsample_table: + - subsample_table1.csv + - subsample_table2.csv + + +sample_modifiers: + append: + local_files: LOCAL + derive: + attributes: [local_files] + sources: + LOCAL: "../data/{file_path}" diff --git a/tests/data/peps/multiple_subsamples/sample_table.csv b/tests/data/peps/multiple_subsamples/sample_table.csv new file mode 100644 index 0000000..1137443 --- /dev/null +++ b/tests/data/peps/multiple_subsamples/sample_table.csv @@ -0,0 +1,5 @@ +sample_name,protocol,identifier +frog_1,anySampleType,frog1 +frog_2,anySampleType,frog2 +frog_3,anySampleType,frog3 +frog_4,anySampleType,frog4 diff --git a/tests/data/peps/multiple_subsamples/subsample_table1.csv b/tests/data/peps/multiple_subsamples/subsample_table1.csv new file mode 100644 index 0000000..1d4f955 --- /dev/null +++ b/tests/data/peps/multiple_subsamples/subsample_table1.csv @@ -0,0 +1,6 @@ +sample_name,file_path,subsample_name +frog_1,file/a.txt,a +frog_1,file/b.txt,b +frog_1,file/c.txt,c +frog_2,file/a.txt,a +frog_2,file/b.txt,b diff --git a/tests/data/peps/multiple_subsamples/subsample_table2.csv b/tests/data/peps/multiple_subsamples/subsample_table2.csv new file mode 100644 index 0000000..2e7f72b --- /dev/null +++ b/tests/data/peps/multiple_subsamples/subsample_table2.csv @@ -0,0 +1,6 @@ +sample_name,random_string,subsample_name +frog_1,x_x,x +frog_1,y_y,y +frog_1,z_z,z +frog_2,xy_yx,xy +frog_2,xx_xx,xx diff --git a/tests/test_conversions.py b/tests/test_conversions.py index 179020e..ba6bffe 100644 --- a/tests/test_conversions.py +++ b/tests/test_conversions.py @@ -1,4 +1,10 @@ -from eido.conversion import * +from eido.conversion import ( + run_filter, + get_available_pep_filters, + pep_conversion_plugins, + convert_project, +) +import peppy class TestConversionInfrastructure: @@ -74,3 +80,13 @@ def test_eido_csv_filter_filters_nextflow_taxprofiler_input_correctly( assert save_result_mock.called assert conv_result == {"samples": output_pep_nextflow_taxprofiler} + + def test_multiple_subsamples(self, test_multiple_subs): + project = peppy.Project(test_multiple_subs) + + # ff = convert_project(project, "yaml", ) + ff = convert_project( + project, + "csv", + ) + ff From 0f8f59dafbb58c03f0cded983544a67ba75b643c Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Mon, 30 Sep 2024 15:01:38 -0400 Subject: [PATCH 3/3] more conversion tests --- eido/conversion_plugins.py | 2 +- requirements/requirements-all.txt | 2 +- .../multiple_subsamples/project_config.yaml | 7 +++++- .../peps/multiple_subsamples/sample_table.csv | 2 +- .../multiple_subsamples/subsample_table1.csv | 2 +- .../multiple_subsamples/subsample_table2.csv | 2 +- tests/test_conversions.py | 22 +++++++++++++++---- 7 files changed, 29 insertions(+), 10 deletions(-) diff --git a/eido/conversion_plugins.py b/eido/conversion_plugins.py index 0f62395..fe45805 100644 --- a/eido/conversion_plugins.py +++ b/eido/conversion_plugins.py @@ -69,7 +69,7 @@ def processed_pep_filter(p, **kwargs) -> Dict[str, str]: samples_as_objects = kwargs.get("samples_as_objects") subsamples_as_objects = kwargs.get("subsamples_as_objects") - prj_repr = p.config.to_dict() + prj_repr = p.config return { "project": str(prj_repr), diff --git a/requirements/requirements-all.txt b/requirements/requirements-all.txt index 5e81881..7c16b30 100644 --- a/requirements/requirements-all.txt +++ b/requirements/requirements-all.txt @@ -2,5 +2,5 @@ importlib-metadata; python_version < '3.10' jsonschema>=3.0.1 logmuse>=0.2.5 pandas -peppy>=0.40.6 +peppy>=0.40.7 ubiquerg>=0.5.2 diff --git a/tests/data/peps/multiple_subsamples/project_config.yaml b/tests/data/peps/multiple_subsamples/project_config.yaml index 1063907..e0e580b 100644 --- a/tests/data/peps/multiple_subsamples/project_config.yaml +++ b/tests/data/peps/multiple_subsamples/project_config.yaml @@ -4,11 +4,16 @@ subsample_table: - subsample_table1.csv - subsample_table2.csv - sample_modifiers: append: local_files: LOCAL + genome: "fg" derive: attributes: [local_files] sources: LOCAL: "../data/{file_path}" + imply: + - if: + identifier: "frog1" + then: + genome: "frog_frog" diff --git a/tests/data/peps/multiple_subsamples/sample_table.csv b/tests/data/peps/multiple_subsamples/sample_table.csv index 1137443..7c06204 100644 --- a/tests/data/peps/multiple_subsamples/sample_table.csv +++ b/tests/data/peps/multiple_subsamples/sample_table.csv @@ -1,4 +1,4 @@ -sample_name,protocol,identifier +sample_id,protocol,identifier frog_1,anySampleType,frog1 frog_2,anySampleType,frog2 frog_3,anySampleType,frog3 diff --git a/tests/data/peps/multiple_subsamples/subsample_table1.csv b/tests/data/peps/multiple_subsamples/subsample_table1.csv index 1d4f955..f1b3c2f 100644 --- a/tests/data/peps/multiple_subsamples/subsample_table1.csv +++ b/tests/data/peps/multiple_subsamples/subsample_table1.csv @@ -1,4 +1,4 @@ -sample_name,file_path,subsample_name +sample_id,file_path,subsample_name frog_1,file/a.txt,a frog_1,file/b.txt,b frog_1,file/c.txt,c diff --git a/tests/data/peps/multiple_subsamples/subsample_table2.csv b/tests/data/peps/multiple_subsamples/subsample_table2.csv index 2e7f72b..5e6d298 100644 --- a/tests/data/peps/multiple_subsamples/subsample_table2.csv +++ b/tests/data/peps/multiple_subsamples/subsample_table2.csv @@ -1,4 +1,4 @@ -sample_name,random_string,subsample_name +sample_id,random_string,subsample_name frog_1,x_x,x frog_1,y_y,y frog_1,z_z,z diff --git a/tests/test_conversions.py b/tests/test_conversions.py index ba6bffe..a4aad8c 100644 --- a/tests/test_conversions.py +++ b/tests/test_conversions.py @@ -82,11 +82,25 @@ def test_eido_csv_filter_filters_nextflow_taxprofiler_input_correctly( assert conv_result == {"samples": output_pep_nextflow_taxprofiler} def test_multiple_subsamples(self, test_multiple_subs): - project = peppy.Project(test_multiple_subs) + project = peppy.Project(test_multiple_subs, sample_table_index="sample_id") - # ff = convert_project(project, "yaml", ) - ff = convert_project( + conversion = convert_project( project, "csv", ) - ff + assert isinstance(conversion["samples"], str) + conversion = convert_project( + project, + "basic", + ) + assert isinstance(conversion["project"], str) + conversion = convert_project( + project, + "yaml", + ) + assert isinstance(conversion["project"], str) + conversion = convert_project( + project, + "yaml-samples", + ) + assert isinstance(conversion["samples"], str)