calpolyccg
diff --git a/‎.github/workflows/CI.yaml
Lines changed: 29 additions & 8 deletions b/‎.github/workflows/CI.yaml
Lines changed: 29 additions & 8 deletions
diff --git a/‎environment.yml
Lines changed: 11 additions & 1 deletion b/‎environment.yml
Lines changed: 11 additions & 1 deletion
diff --git a/‎mdsapt/__init__.py
Lines changed: 1 addition & 1 deletion b/‎mdsapt/__init__.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎mdsapt/config.py
Lines changed: 172 additions & 0 deletions b/‎mdsapt/config.py
Lines changed: 172 additions & 0 deletions
diff --git a/‎mdsapt/data/template_input.yaml
Lines changed: 55 additions & 20 deletions b/‎mdsapt/data/template_input.yaml
Lines changed: 55 additions & 20 deletions
@@ -50,20 +50,15 @@ jobs:
         auto-update-conda: false
         auto-activate-base: false
         use-only-tar-bz2: true # IMPORTANT: This needs to be set for caching to work properly!
-
-    - name: Install MDSAPT package
-      # conda setup requires this special shell
+    
+    - name: Type-check
       shell: bash -l {0}
       run: |
-        python -m pip install . --no-deps
-        conda list
+        pyright mdsapt
 
-    # TODO: possibly integrate this step with meta.yml?
     - name: Run tests
-      # conda setup requires this special shell
       shell: bash -l {0}
       run: |
-        pip install pytest-cov
         pytest -v ./mdsapt --cov=mdsapt --cov-report=xml
 
     - name: Upload CodeCov
@@ -73,6 +68,12 @@ jobs:
         flags: unittests
         name: codecov-${{ matrix.os }}-py${{ matrix.python-version }}
 
+    - name: Install MDSAPT package
+      shell: bash -l {0}
+      run: |
+        python -m pip install . --no-deps
+        conda list
+
   package:
     name: Packaging py${{ matrix.python-version }}/${{ matrix.os }}
     runs-on: ${{ matrix.os }}
@@ -162,3 +163,23 @@ jobs:
       shell: bash -l {0}
       run: >
         python3 -c "import mdsapt"
+
+  lint-and-format:
+    # These checks are much faster and don't actually need to install any packages.
+    name: Run fast checks over code
+    runs-on: ubuntu-latest
+
+    steps:
+    - uses: actions/checkout@v2
+
+    - name: Install formatter and linter
+      run: |
+        pip install autopep8 pylint
+
+    - name: Lint code
+      run: |
+        pylint mdsapt
+
+    - name: Ensure formatting
+      run: |
+        autopep8 -r mdsapt --diff --exit-code
@@ -7,13 +7,23 @@ channels:
   - defaults
 dependencies:
   - mdanalysis
-  - nglview
   - numpy
   - openmm
   - pandas
   - pdbfixer
   - psi4
   - pytest
+  - pydantic
   - pytest-cov
   - pyyaml
   - rdkit
+
+  # Optional deps
+  - nglview
+
+  # Development deps
+  - autopep8
+  - pyright # type-checking
+  - pylint
+  - pytest
+  - pytest-cov
@@ -4,7 +4,7 @@
 from . import log
 
 # Import core classes
-from .reader import InputReader
+from .config import Config
 from .optimizer import Optimizer
 from .sapt import TrajectorySAPT
 
 
@@ -0,0 +1,172 @@
+r"""
+:mod:`mdsapt.reader` -- Reads input file and saves configuration
+================================================================
+
+MDSAPT uses an yaml file to get user's configurations for SAPT calculations
+class`mdsapt.reader.InputReader` is responsible for reading the yaml file and
+returning the information from it. If a yaml file is needed it can be generated
+using the included *mdsapt_get_runinput* script.
+
+.. autoexception:: ConfigurationError
+
+.. autoclass:: InputReader
+    :members:
+    :inherited-members:
+"""
+
+from enum import Enum
+from pathlib import Path
+from typing import List, Dict, Tuple, Literal, Optional, \
+    Union, Any, Set
+
+import yaml
+
+import MDAnalysis as mda
+
+import logging
+
+from pydantic import BaseModel, conint, Field, root_validator, \
+    FilePath, ValidationError, DirectoryPath
+
+logger = logging.getLogger('mdsapt.config')
+
+
+class Psi4Config(BaseModel):
+    """Psi4 configuration details
+
+    The SAPT method to use.
+
+    NOTE: You can use any valid Psi4 method, but it might fail if you don't use a SAPT method.
+
+    The basis to use in Psi4.
+
+    NOTE: We do not verify if this is a valid basis set or not.
+    """
+
+    method: str
+    basis: str
+    save_output: bool  # whether to save the raw output of Psi4. May be useful for debugging.
+    settings: Dict[str, str]  # Other Psi4 settings you would like to provide.
+
+
+class SysLimitsConfig(BaseModel):
+    """Resource limits for your system."""
+    ncpus: conint(ge=1)
+    memory: str
+
+
+class ChargeGuesser(Enum):
+    Standard = 'standard'
+    RDKit = 'rdkit'
+
+
+class SimulationConfig(BaseModel):
+    ph: float
+    charge_guesser: ChargeGuesser
+
+
+class DetailedTopologySelection(BaseModel):
+    path: FilePath
+    charge_overrides: Dict[int, int] = Field(default_factory=dict)
+
+
+TopologySelection = Union[FilePath, DetailedTopologySelection]
+
+
+def topology_selection_path(sel: TopologySelection) -> FilePath:
+    if isinstance(sel, DetailedTopologySelection):
+        return sel.path
+    return sel
+
+
+class RangeFrameSelection(BaseModel):
+    start: Optional[conint(ge=0)]
+    stop: Optional[conint(ge=0)]
+    step: Optional[conint(ge=1)]
+
+    @root_validator()
+    def check_start_before_stop(cls, values: Dict[str, int]) -> Dict[str, int]:
+        assert values['start'] <= values['stop'], "start must be before stop"
+        return values
+
+
+class TrajectoryAnalysisConfig(BaseModel):
+    """
+    A selection of the frames used in this analysis.
+
+    Serialization behavior
+    ----------------------
+    If this value is a range, it will be serialized using start/stop/step.
+    Otherwise, it will be serialized into a List[int].
+    """
+    type: Literal['trajectory']
+    topology: TopologySelection
+    trajectories: List[FilePath]
+    pairs: List[Tuple[conint(ge=0), conint(ge=0)]]
+    frames: Union[List[int], RangeFrameSelection]
+    output: str
+
+    @root_validator()
+    def check_valid_md_system(cls, values: Dict[str, Any]) -> Dict[str, Any]:
+        errors: List[str] = []
+
+        top_path: TopologySelection = values['topology']
+        trj_path: List[FilePath] = values['trajectories']
+        ag_pair: List[Tuple[conint(ge=0), conint(ge=0)]] = values['pairs']
+        frames: Union[List[int], RangeFrameSelection] = values['frames']
+
+        try:
+            unv = mda.Universe(str(topology_selection_path(top_path)),
+                               [str(p) for p in trj_path])
+        except (mda.exceptions.NoDataError, OSError, ValueError):
+            errors.append('Error while creating universe using provided topology and trajectories')
+            raise ValidationError(errors)  # If Universe doesn't load need to stop
+
+        # Ensure that
+        items: Set[int] = {i for pair in ag_pair for i in pair}
+
+        for sel in items:
+            ag: mda.AtomGroup = unv.select_atoms(f'resid {sel}')
+            if len(ag) == 0:
+                errors.append(f"Selection {sel} returns an empty AtomGroup.")
+
+
+
+        trajlen: int = len(unv.trajectory)
+        if isinstance(frames, RangeFrameSelection):
+            if trajlen <= frames.stop:
+                errors.append(f'Stop {frames.stop} exceeds trajectory length {trajlen}.')
+        else:
+            frames: List[int]
+            for frame in frames:
+                if frame >= trajlen:
+                    errors.append(f'Frame {frame} exceeds trajectory length {trajlen}')
+
+        if len(errors) > 0:
+            raise ValidationError(errors)
+        return values
+
+
+class DockingAnalysisConfig(BaseModel):
+    type: Literal['docking']
+    topologies: Union[List[TopologySelection], DirectoryPath]
+    pairs: List[Tuple[int, int]]
+
+
+class Config(BaseModel):
+    psi4: Psi4Config
+    simulation: SimulationConfig
+    system_limits: SysLimitsConfig
+    analysis: Union[TrajectoryAnalysisConfig, DockingAnalysisConfig] = Field(..., discriminator='type')
+
+
+def load_from_yaml_file(path: Union[str, Path]) -> Config:
+    if isinstance(path, str):
+        path = Path(path)
+
+    with path.open() as f:
+        try:
+            return Config(**yaml.safe_load(f))
+        except ValidationError as err:
+            logger.exception(f"Error while loading {path}")
+            raise err
@@ -1,24 +1,59 @@
-topology_path:
-trajectory_paths:
-  -
-selection_resid_num:
-  -
-int_pairs:
-  # Place pair names defined above in list in a list
-  - []
-trajectory_settings:
-  start:
-  stop:
-  step:
-system_settings:
-  ncpus:
-  memory:
-  time:
-opt_settings:
-  pH: 7.0
-sapt_settings:
+psi4:
   method: 'sapt'
   basis: 'jun-cc-pvdz'
+  save_output: true
   settings:
     reference: 'rhf'
-  save_psi4_output: true
+simulation:
+  ph: 7.0
+  charge_guesser: 'standard'
+  # charge_guesser: 'rdkit'  # to use rdkit. Make sure it is installed first.
+system_limits:
+  ncpus:
+  memory:
+analysis:
+  ### This section is for running TrajectorySAPT. To run other types of analyses, see below.
+  type: 'trajectory'
+
+  topology: 'your/file/path.pdb'
+  # If you want to override the charges of specific, you may specify it this way.
+  # topology:
+  #    path: 'your/file/path.pdb'
+  #    charge_overrides:
+  #      132: -2
+
+  trajectories:
+    - 'your/trajectory/path.dcd'
+  pairs:
+    - [132, 152]
+    - [34, 152]
+  frames:
+    # To select a set of specific frames:
+    - 1
+    - 4
+    - 8
+
+    # # Alternatively, to select a range of frames: (these are mutually exclusive)
+    # start:
+    # stop:
+    # step:
+
+  output: 'output.csv'
+
+#  ### For running DockingSAPT
+#  type: 'docking'
+#
+#  topologies:
+#    - 'one/topology.pdb'
+#    - 'another/topology.pdb'
+#    - 'a/glob/of/topologies/*.pdb'
+#    - 'a/directory/full/of/topologies'
+#    - path: 'one/specific/topology/with/weird/charges.pdb'
+#      charge_overrides:
+#        145: -4
+#
+#  pairs:
+#    - [132, 152]
+#    - [34, 152]
+#
+#  output: 'output.csv'