From 09c7df27385b9b7a90cde05c508bfbd722706703 Mon Sep 17 00:00:00 2001
From: Khoroshevskyi <sasha99250@gmail.com>
Date: Mon, 30 Sep 2024 11:55:03 -0400
Subject: [PATCH 1/3] fixed #76

---
 eido/_version.py                                 |  2 +-
 eido/validation.py                               |  4 ++++
 tests/conftest.py                                | 16 +++++++++++++---
 .../project_config.yaml                          |  0
 .../sample_table.csv                             |  0
 .../subsample_table.csv                          |  0
 .../peps/value_check_pep/project_config.yaml     |  6 ++++++
 tests/data/peps/value_check_pep/sample_table.csv |  7 +++++++
 tests/data/schemas/value_check_schema.yaml       | 16 ++++++++++++++++
 tests/test_validations.py                        | 13 +++++++++----
 10 files changed, 56 insertions(+), 8 deletions(-)
 rename tests/data/peps/{test_file_existence => test_file_existing}/project_config.yaml (100%)
 rename tests/data/peps/{test_file_existence => test_file_existing}/sample_table.csv (100%)
 rename tests/data/peps/{test_file_existence => test_file_existing}/subsample_table.csv (100%)
 create mode 100644 tests/data/peps/value_check_pep/project_config.yaml
 create mode 100644 tests/data/peps/value_check_pep/sample_table.csv
 create mode 100644 tests/data/schemas/value_check_schema.yaml

diff --git a/eido/_version.py b/eido/_version.py
index d31c31e..788da1f 100644
--- a/eido/_version.py
+++ b/eido/_version.py
@@ -1 +1 @@
-__version__ = "0.2.3"
+__version__ = "0.2.4"
diff --git a/eido/validation.py b/eido/validation.py
index e409a13..e75d7e6 100644
--- a/eido/validation.py
+++ b/eido/validation.py
@@ -43,6 +43,10 @@ def _validate_object(obj: Mapping, schema: Union[str, dict], sample_name_colname
                 instance_name = error.instance[sample_name_colname]
             except KeyError:
                 instance_name = "project"
+            except TypeError:
+                instance_name = obj["samples"][error.absolute_path[1]][
+                    sample_name_colname
+                ]
             errors_by_type[error.message].append(
                 {
                     "type": error.message,
diff --git a/tests/conftest.py b/tests/conftest.py
index 905591e..eb931d9 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -124,10 +124,20 @@ def save_result_mock(mocker):
 
 
 @pytest.fixture
-def test_file_existence_schema(schemas_path):
+def test_file_existing_schema(schemas_path):
     return os.path.join(schemas_path, "schema_test_file_exist.yaml")
 
 
 @pytest.fixture
-def test_file_existance_pep(peps_path):
-    return os.path.join(peps_path, "test_file_existence", "project_config.yaml")
+def test_file_existing_pep(peps_path):
+    return os.path.join(peps_path, "test_file_existing", "project_config.yaml")
+
+
+@pytest.fixture
+def test_schema_value_check(schemas_path):
+    return os.path.join(schemas_path, "value_check_schema.yaml")
+
+
+@pytest.fixture
+def test_file_value_check(peps_path):
+    return os.path.join(peps_path, "value_check_pep", "project_config.yaml")
diff --git a/tests/data/peps/test_file_existence/project_config.yaml b/tests/data/peps/test_file_existing/project_config.yaml
similarity index 100%
rename from tests/data/peps/test_file_existence/project_config.yaml
rename to tests/data/peps/test_file_existing/project_config.yaml
diff --git a/tests/data/peps/test_file_existence/sample_table.csv b/tests/data/peps/test_file_existing/sample_table.csv
similarity index 100%
rename from tests/data/peps/test_file_existence/sample_table.csv
rename to tests/data/peps/test_file_existing/sample_table.csv
diff --git a/tests/data/peps/test_file_existence/subsample_table.csv b/tests/data/peps/test_file_existing/subsample_table.csv
similarity index 100%
rename from tests/data/peps/test_file_existence/subsample_table.csv
rename to tests/data/peps/test_file_existing/subsample_table.csv
diff --git a/tests/data/peps/value_check_pep/project_config.yaml b/tests/data/peps/value_check_pep/project_config.yaml
new file mode 100644
index 0000000..66c4380
--- /dev/null
+++ b/tests/data/peps/value_check_pep/project_config.yaml
@@ -0,0 +1,6 @@
+description: None
+name: encode_prj
+pep_version: 2.0.0
+project_name: value_check_pep
+sample_table: sample_table.csv
+subsample_table: []
diff --git a/tests/data/peps/value_check_pep/sample_table.csv b/tests/data/peps/value_check_pep/sample_table.csv
new file mode 100644
index 0000000..cefc2aa
--- /dev/null
+++ b/tests/data/peps/value_check_pep/sample_table.csv
@@ -0,0 +1,7 @@
+sample_name,file_name,genome,assay,cell_line,target,format_type
+encode_4,ENCFF452DAM.bed.gz,hg38,Histone ChIP-seq,skeletal muscle myoblast,H3K36me3,narrowPeak
+encode_20,ENCFF121AXG.bed.gz,hg38,DNase-seq,RPMI7951,,tssPeak
+encode_21,ENCFF710ECJ.bed.gz,hg38,DNase-seq,RPMI7951,,broadPeak
+encode_22,ENCFF945FZN.bed.gz,hg38,DNase-seq,RPMI7951,,narrowPeak
+encode_23,ENCFF322PQO.bed.gz,hg38,DNase-seq,RPMI7951,,tssPeak
+encode_24,ENCFF322PQO.bed.gz,hg38,DNase-seq,RPMI7951,,tssPeak1
diff --git a/tests/data/schemas/value_check_schema.yaml b/tests/data/schemas/value_check_schema.yaml
new file mode 100644
index 0000000..fb2352d
--- /dev/null
+++ b/tests/data/schemas/value_check_schema.yaml
@@ -0,0 +1,16 @@
+description: bedboss run-all pep schema
+properties:
+  samples:
+    items:
+      properties:
+        format_type:
+          description: whether the regions are narrow (transcription factor implies
+            narrow, histone mark implies broad peaks)
+          enum:
+          - narrowPeak
+          - broadPeak
+          type: string
+      type: object
+    type: array
+required:
+- samples
diff --git a/tests/test_validations.py b/tests/test_validations.py
index a217ca4..6bea0a4 100644
--- a/tests/test_validations.py
+++ b/tests/test_validations.py
@@ -1,7 +1,6 @@
 import urllib
 
 import pytest
-from jsonschema.exceptions import ValidationError
 from peppy import Project
 from peppy.utils import load_yaml
 
@@ -140,9 +139,15 @@ def test_validate_detects_invalid(self, schema_invalid_file_path, remote_pep_cfg
             )
 
     def test_validate_file_existance(
-        self, test_file_existance_pep, test_file_existence_schema
+        self, test_file_existing_pep, test_file_existing_schema
     ):
-        schema_path = test_file_existence_schema
-        prj = Project(test_file_existance_pep)
+        schema_path = test_file_existing_schema
+        prj = Project(test_file_existing_pep)
         with pytest.raises(PathAttrNotFoundError):
             validate_input_files(prj, schema_path)
+
+    def test_validation_values(self, test_schema_value_check, test_file_value_check):
+        schema_path = test_schema_value_check
+        prj = Project(test_file_value_check)
+        with pytest.raises(EidoValidationError):
+            validate_project(project=prj, schema=schema_path)

From 5fd4867945e194b11e2aa6ab11ad2dc630be13af Mon Sep 17 00:00:00 2001
From: Khoroshevskyi <sasha99250@gmail.com>
Date: Mon, 30 Sep 2024 12:28:06 -0400
Subject: [PATCH 2/3] fixed #70

---
 eido/conversion_plugins.py                     |  3 +--
 eido/output_formatters.py                      |  2 +-
 tests/conftest.py                              |  5 +++++
 .../multiple_subsamples/project_config.yaml    | 14 ++++++++++++++
 .../peps/multiple_subsamples/sample_table.csv  |  5 +++++
 .../multiple_subsamples/subsample_table1.csv   |  6 ++++++
 .../multiple_subsamples/subsample_table2.csv   |  6 ++++++
 tests/test_conversions.py                      | 18 +++++++++++++++++-
 8 files changed, 55 insertions(+), 4 deletions(-)
 create mode 100644 tests/data/peps/multiple_subsamples/project_config.yaml
 create mode 100644 tests/data/peps/multiple_subsamples/sample_table.csv
 create mode 100644 tests/data/peps/multiple_subsamples/subsample_table1.csv
 create mode 100644 tests/data/peps/multiple_subsamples/subsample_table2.csv

diff --git a/eido/conversion_plugins.py b/eido/conversion_plugins.py
index 01dd550..0f62395 100644
--- a/eido/conversion_plugins.py
+++ b/eido/conversion_plugins.py
@@ -42,8 +42,7 @@ def yaml_pep_filter(p, **kwargs) -> Dict[str, str]:
     """
     from yaml import dump
 
-    data = p.config.to_dict()
-    return {"project": dump(data, default_flow_style=False)}
+    return {"project": dump(p.config, default_flow_style=False)}
 
 
 def csv_pep_filter(p, **kwargs) -> Dict[str, str]:
diff --git a/eido/output_formatters.py b/eido/output_formatters.py
index 8d20a12..f9d2486 100644
--- a/eido/output_formatters.py
+++ b/eido/output_formatters.py
@@ -110,7 +110,7 @@ def _convert_sample_to_row(
             ):
                 value = sample[attribute][sample_index]
             else:
-                value = sample[attribute]
+                value = sample.get(attribute)
 
             sample_row.append(value or "")
 
diff --git a/tests/conftest.py b/tests/conftest.py
index eb931d9..8e47b94 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -141,3 +141,8 @@ def test_schema_value_check(schemas_path):
 @pytest.fixture
 def test_file_value_check(peps_path):
     return os.path.join(peps_path, "value_check_pep", "project_config.yaml")
+
+
+@pytest.fixture
+def test_multiple_subs(peps_path):
+    return os.path.join(peps_path, "multiple_subsamples", "project_config.yaml")
diff --git a/tests/data/peps/multiple_subsamples/project_config.yaml b/tests/data/peps/multiple_subsamples/project_config.yaml
new file mode 100644
index 0000000..1063907
--- /dev/null
+++ b/tests/data/peps/multiple_subsamples/project_config.yaml
@@ -0,0 +1,14 @@
+pep_version: "2.1.0"
+sample_table: sample_table.csv
+subsample_table:
+  - subsample_table1.csv
+  - subsample_table2.csv
+
+
+sample_modifiers:
+  append:
+    local_files: LOCAL
+  derive:
+    attributes: [local_files]
+    sources:
+      LOCAL: "../data/{file_path}"
diff --git a/tests/data/peps/multiple_subsamples/sample_table.csv b/tests/data/peps/multiple_subsamples/sample_table.csv
new file mode 100644
index 0000000..1137443
--- /dev/null
+++ b/tests/data/peps/multiple_subsamples/sample_table.csv
@@ -0,0 +1,5 @@
+sample_name,protocol,identifier
+frog_1,anySampleType,frog1
+frog_2,anySampleType,frog2
+frog_3,anySampleType,frog3
+frog_4,anySampleType,frog4
diff --git a/tests/data/peps/multiple_subsamples/subsample_table1.csv b/tests/data/peps/multiple_subsamples/subsample_table1.csv
new file mode 100644
index 0000000..1d4f955
--- /dev/null
+++ b/tests/data/peps/multiple_subsamples/subsample_table1.csv
@@ -0,0 +1,6 @@
+sample_name,file_path,subsample_name
+frog_1,file/a.txt,a
+frog_1,file/b.txt,b
+frog_1,file/c.txt,c
+frog_2,file/a.txt,a
+frog_2,file/b.txt,b
diff --git a/tests/data/peps/multiple_subsamples/subsample_table2.csv b/tests/data/peps/multiple_subsamples/subsample_table2.csv
new file mode 100644
index 0000000..2e7f72b
--- /dev/null
+++ b/tests/data/peps/multiple_subsamples/subsample_table2.csv
@@ -0,0 +1,6 @@
+sample_name,random_string,subsample_name
+frog_1,x_x,x
+frog_1,y_y,y
+frog_1,z_z,z
+frog_2,xy_yx,xy
+frog_2,xx_xx,xx
diff --git a/tests/test_conversions.py b/tests/test_conversions.py
index 179020e..ba6bffe 100644
--- a/tests/test_conversions.py
+++ b/tests/test_conversions.py
@@ -1,4 +1,10 @@
-from eido.conversion import *
+from eido.conversion import (
+    run_filter,
+    get_available_pep_filters,
+    pep_conversion_plugins,
+    convert_project,
+)
+import peppy
 
 
 class TestConversionInfrastructure:
@@ -74,3 +80,13 @@ def test_eido_csv_filter_filters_nextflow_taxprofiler_input_correctly(
 
         assert save_result_mock.called
         assert conv_result == {"samples": output_pep_nextflow_taxprofiler}
+
+    def test_multiple_subsamples(self, test_multiple_subs):
+        project = peppy.Project(test_multiple_subs)
+
+        # ff = convert_project(project, "yaml", )
+        ff = convert_project(
+            project,
+            "csv",
+        )
+        ff

From 0f8f59dafbb58c03f0cded983544a67ba75b643c Mon Sep 17 00:00:00 2001
From: Khoroshevskyi <sasha99250@gmail.com>
Date: Mon, 30 Sep 2024 15:01:38 -0400
Subject: [PATCH 3/3] more conversion tests

---
 eido/conversion_plugins.py                    |  2 +-
 requirements/requirements-all.txt             |  2 +-
 .../multiple_subsamples/project_config.yaml   |  7 +++++-
 .../peps/multiple_subsamples/sample_table.csv |  2 +-
 .../multiple_subsamples/subsample_table1.csv  |  2 +-
 .../multiple_subsamples/subsample_table2.csv  |  2 +-
 tests/test_conversions.py                     | 22 +++++++++++++++----
 7 files changed, 29 insertions(+), 10 deletions(-)

diff --git a/eido/conversion_plugins.py b/eido/conversion_plugins.py
index 0f62395..fe45805 100644
--- a/eido/conversion_plugins.py
+++ b/eido/conversion_plugins.py
@@ -69,7 +69,7 @@ def processed_pep_filter(p, **kwargs) -> Dict[str, str]:
     samples_as_objects = kwargs.get("samples_as_objects")
     subsamples_as_objects = kwargs.get("subsamples_as_objects")
 
-    prj_repr = p.config.to_dict()
+    prj_repr = p.config
 
     return {
         "project": str(prj_repr),
diff --git a/requirements/requirements-all.txt b/requirements/requirements-all.txt
index 5e81881..7c16b30 100644
--- a/requirements/requirements-all.txt
+++ b/requirements/requirements-all.txt
@@ -2,5 +2,5 @@ importlib-metadata; python_version < '3.10'
 jsonschema>=3.0.1
 logmuse>=0.2.5
 pandas
-peppy>=0.40.6
+peppy>=0.40.7
 ubiquerg>=0.5.2
diff --git a/tests/data/peps/multiple_subsamples/project_config.yaml b/tests/data/peps/multiple_subsamples/project_config.yaml
index 1063907..e0e580b 100644
--- a/tests/data/peps/multiple_subsamples/project_config.yaml
+++ b/tests/data/peps/multiple_subsamples/project_config.yaml
@@ -4,11 +4,16 @@ subsample_table:
   - subsample_table1.csv
   - subsample_table2.csv
 
-
 sample_modifiers:
   append:
     local_files: LOCAL
+    genome: "fg"
   derive:
     attributes: [local_files]
     sources:
       LOCAL: "../data/{file_path}"
+  imply:
+    - if:
+        identifier: "frog1"
+      then:
+        genome: "frog_frog"
diff --git a/tests/data/peps/multiple_subsamples/sample_table.csv b/tests/data/peps/multiple_subsamples/sample_table.csv
index 1137443..7c06204 100644
--- a/tests/data/peps/multiple_subsamples/sample_table.csv
+++ b/tests/data/peps/multiple_subsamples/sample_table.csv
@@ -1,4 +1,4 @@
-sample_name,protocol,identifier
+sample_id,protocol,identifier
 frog_1,anySampleType,frog1
 frog_2,anySampleType,frog2
 frog_3,anySampleType,frog3
diff --git a/tests/data/peps/multiple_subsamples/subsample_table1.csv b/tests/data/peps/multiple_subsamples/subsample_table1.csv
index 1d4f955..f1b3c2f 100644
--- a/tests/data/peps/multiple_subsamples/subsample_table1.csv
+++ b/tests/data/peps/multiple_subsamples/subsample_table1.csv
@@ -1,4 +1,4 @@
-sample_name,file_path,subsample_name
+sample_id,file_path,subsample_name
 frog_1,file/a.txt,a
 frog_1,file/b.txt,b
 frog_1,file/c.txt,c
diff --git a/tests/data/peps/multiple_subsamples/subsample_table2.csv b/tests/data/peps/multiple_subsamples/subsample_table2.csv
index 2e7f72b..5e6d298 100644
--- a/tests/data/peps/multiple_subsamples/subsample_table2.csv
+++ b/tests/data/peps/multiple_subsamples/subsample_table2.csv
@@ -1,4 +1,4 @@
-sample_name,random_string,subsample_name
+sample_id,random_string,subsample_name
 frog_1,x_x,x
 frog_1,y_y,y
 frog_1,z_z,z
diff --git a/tests/test_conversions.py b/tests/test_conversions.py
index ba6bffe..a4aad8c 100644
--- a/tests/test_conversions.py
+++ b/tests/test_conversions.py
@@ -82,11 +82,25 @@ def test_eido_csv_filter_filters_nextflow_taxprofiler_input_correctly(
         assert conv_result == {"samples": output_pep_nextflow_taxprofiler}
 
     def test_multiple_subsamples(self, test_multiple_subs):
-        project = peppy.Project(test_multiple_subs)
+        project = peppy.Project(test_multiple_subs, sample_table_index="sample_id")
 
-        # ff = convert_project(project, "yaml", )
-        ff = convert_project(
+        conversion = convert_project(
             project,
             "csv",
         )
-        ff
+        assert isinstance(conversion["samples"], str)
+        conversion = convert_project(
+            project,
+            "basic",
+        )
+        assert isinstance(conversion["project"], str)
+        conversion = convert_project(
+            project,
+            "yaml",
+        )
+        assert isinstance(conversion["project"], str)
+        conversion = convert_project(
+            project,
+            "yaml-samples",
+        )
+        assert isinstance(conversion["samples"], str)