Skip to content

Commit

Permalink
Merge pull request #64 from lcmd-epfl/new-metatensor
Browse files Browse the repository at this point in the history
* New version of metatensor
* Fix the discrepancies between pip's files, requirements.txt and environment.yml
* Update the installation section of README
* Use qstack_qml as qstack.qml
  • Loading branch information
briling authored Jun 20, 2024
2 parents d955103 + 3ad83dc commit c6e487e
Show file tree
Hide file tree
Showing 12 changed files with 134 additions and 65 deletions.
13 changes: 12 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -28,11 +28,22 @@ For now, we link to the relevant packages that will be incorporated (among other

## Install [](#contents)

The installation of the library for python use can be done executing the following commands:
The installation of the library for python use can be done executing one of the following commands:

```
python -m pip install git+https://github.com/lcmd-epfl/Q-stack.git
python -m pip install "qstack @ git+https://github.com/lcmd-epfl/Q-stack.git"
python -m pip install "qstack[all] @ git+https://github.com/lcmd-epfl/Q-stack.git"
python -m pip install -r requirements.py3.11.txt
```

The last two are recommended if you do not know which features you wish to use, since they pull the dependencies required with all 'optional' parts of Q-stack.

If you wish to use a conda environment, an `environment.yml` file is also available, for the conda analogue of the last install command.

A small part of Q-stack, isolated in the `qstack_qml` module name, can be installed on its own, see [the qstack-qml subdirectory's readme](../master/qstack/qstack-qml/README.md).


## Examples [](#contents)
Q-stack comes with several example codes that illustrate some of its key capabilities. To run the examples, go to the example folder and run the following commands:

Expand Down
3 changes: 2 additions & 1 deletion environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -48,5 +48,6 @@ dependencies:
- toml==0.10.2
- ase==3.22
- tqdm==4.66
- git+https://github.com/lab-cosmo/equistore.git@e5b9dc365369ba2584ea01e9d6a4d648008aaab8#subdirectory=python/equistore-core
- metatensor-core==0.1.8
- git+https://github.com/lcmd-epfl/cell2mol.git@22473bbf12a013467137a55a63c88fbbdc95baa2
- qstack/qstack-qml
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ dependencies = [
regression = ["scikit-learn >= 0.24.2, < 1.6"]
wigner = ["sympy >= 1.5, < 1.13"]
gmol = ["cell2mol @ git+https://github.com/lcmd-epfl/cell2mol.git@22473bbf12a013467137a55a63c88fbbdc95baa2"] # branch: dev, date: 2024-06-06
equio = ["equistore-core @ git+https://github.com/lab-cosmo/equistore.git@e5b9dc365369ba2584ea01e9d6a4d648008aaab8#subdirectory=python/equistore-core"]
equio = ["metatensor-core == 0.1.8"]
all = ["qstack[qml,regression,wigner,equio,gmol]"]

[project.urls]
Expand Down
5 changes: 5 additions & 0 deletions qstack/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,12 @@
from qstack import spahm
from qstack import mathutils
from qstack import orcaio
from qstack import qml
if 'b2r2' not in dir(qml):
del qml


# qstack.regression needs sklearn to work
try:
import sklearn
except ImportError:
Expand Down
107 changes: 63 additions & 44 deletions qstack/equio.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
from functools import reduce
import numpy as np
from types import SimpleNamespace
from pyscf import data
import equistore.core as equistore
import metatensor
import numbers

vector_label_names = SimpleNamespace(
Expand Down Expand Up @@ -58,13 +59,25 @@ def _get_tsize(tensor):
"""Computes the size of a tensor.
Args:
tensor (equistore TensorMap): Tensor.
tensor (metatensor TensorMap): Tensor.
Returns:
The size of the tensor as an integer.
"""
return sum([np.prod(tensor.block(key).values.shape) for key in tensor.keys])

def _labels_to_array(labels):
"""Represents a set of metatensor labels as an array of the labels, using custom dtypes
Args:
labels (metatensor Labels): Labels
Returns:
labels (numpy ndarray[ndim=1, structured dtype]): the same labels
"""
values = labels.values
dtype = [ (name,values.dtype) for name in labels.names]
return values.view(dtype=dtype).reshape(values.shape[0])

def vector_to_tensormap(mol, c):
"""Transform a vector into a tensor map. Used by :py:func:`array_to_tensormap`.
Expand All @@ -74,7 +87,7 @@ def vector_to_tensormap(mol, c):
v (numpy ndarray): Vector.
Returns:
A equistore tensor map.
A metatensor tensor map.
"""

atom_charges = list(mol.atom_charges())
Expand Down Expand Up @@ -104,11 +117,11 @@ def vector_to_tensormap(mol, c):
block_prop_label_vals[label] = np.arange(properties_count).reshape(-1,1)
block_samp_label_vals[label] = np.where(atom_charges==q)[0].reshape(-1,1)

tm_labels = equistore.Labels(vector_label_names.tm, np.array(tm_label_vals))
tm_labels = metatensor.Labels(vector_label_names.tm, np.array(tm_label_vals))

block_comp_labels = {key: equistore.Labels(vector_label_names.block_comp, block_comp_label_vals[key]) for key in blocks}
block_prop_labels = {key: equistore.Labels(vector_label_names.block_prop, block_prop_label_vals[key]) for key in blocks}
block_samp_labels = {key: equistore.Labels(vector_label_names.block_samp, block_samp_label_vals[key]) for key in blocks}
block_comp_labels = {key: metatensor.Labels(vector_label_names.block_comp, block_comp_label_vals[key]) for key in blocks}
block_prop_labels = {key: metatensor.Labels(vector_label_names.block_prop, block_prop_label_vals[key]) for key in blocks}
block_samp_labels = {key: metatensor.Labels(vector_label_names.block_samp, block_samp_label_vals[key]) for key in blocks}

# Fill in the blocks

Expand Down Expand Up @@ -138,8 +151,8 @@ def vector_to_tensormap(mol, c):

# Build tensor blocks and tensor map

tensor_blocks = [equistore.TensorBlock(values=blocks[key], samples=block_samp_labels[key], components=[block_comp_labels[key]], properties=block_prop_labels[key]) for key in tm_label_vals]
tensor = equistore.TensorMap(keys=tm_labels, blocks=tensor_blocks)
tensor_blocks = [metatensor.TensorBlock(values=blocks[key], samples=block_samp_labels[key], components=[block_comp_labels[key]], properties=block_prop_labels[key]) for key in tm_label_vals]
tensor = metatensor.TensorMap(keys=tm_labels, blocks=tensor_blocks)

return tensor

Expand All @@ -149,7 +162,7 @@ def tensormap_to_vector(mol, tensor):
Args:
mol (pyscf Mole): pyscf Mole object.
tensor (equistore TensorMap): Tensor.
tensor (metatensor TensorMap): Tensor.
Returns:
A numpy ndarray (vector).
Expand Down Expand Up @@ -185,7 +198,7 @@ def matrix_to_tensormap(mol, dm):
v (numpy ndarray): Matrix.
Returns:
A equistore tensor map.
A metatensor tensor map.
"""

def pairs(list1, list2):
Expand Down Expand Up @@ -226,14 +239,14 @@ def pairs(list1, list2):
block_prop_label_vals[label] = pairs(np.arange(properties_count1), np.arange(properties_count2))
block_samp_label_vals[label] = pairs(np.where(atom_charges==q1)[0],np.where(atom_charges==q2)[0])

tm_labels = equistore.Labels(matrix_label_names.tm, np.array(tm_label_vals))
tm_labels = metatensor.Labels(matrix_label_names.tm, np.array(tm_label_vals))

block_prop_labels = {key: equistore.Labels(matrix_label_names.block_prop, block_prop_label_vals[key]) for key in blocks}
block_samp_labels = {key: equistore.Labels(matrix_label_names.block_samp, block_samp_label_vals[key]) for key in blocks}
block_comp_labels = {key: [equistore.Labels([name], vals) for name, vals in zip(matrix_label_names.block_comp, block_comp_label_vals[key])] for key in blocks}
block_prop_labels = {key: metatensor.Labels(matrix_label_names.block_prop, block_prop_label_vals[key]) for key in blocks}
block_samp_labels = {key: metatensor.Labels(matrix_label_names.block_samp, block_samp_label_vals[key]) for key in blocks}
block_comp_labels = {key: [metatensor.Labels([name], vals) for name, vals in zip(matrix_label_names.block_comp, block_comp_label_vals[key])] for key in blocks}

# Build tensor blocks
tensor_blocks = [equistore.TensorBlock(values=blocks[key], samples=block_samp_labels[key], components=block_comp_labels[key], properties=block_prop_labels[key]) for key in tm_label_vals]
tensor_blocks = [metatensor.TensorBlock(values=blocks[key], samples=block_samp_labels[key], components=block_comp_labels[key], properties=block_prop_labels[key]) for key in tm_label_vals]

# Fill in the blocks

Expand Down Expand Up @@ -293,8 +306,8 @@ def pairs(list1, list2):
blocks[key] = np.ascontiguousarray(blocks[key][:,:,_pyscf2gpr_l1_order,:])

# Build tensor map
tensor_blocks = [equistore.TensorBlock(values=blocks[key], samples=block_samp_labels[key], components=block_comp_labels[key], properties=block_prop_labels[key]) for key in tm_label_vals]
tensor = equistore.TensorMap(keys=tm_labels, blocks=tensor_blocks)
tensor_blocks = [metatensor.TensorBlock(values=blocks[key], samples=block_samp_labels[key], components=block_comp_labels[key], properties=block_prop_labels[key]) for key in tm_label_vals]
tensor = metatensor.TensorMap(keys=tm_labels, blocks=tensor_blocks)

return tensor

Expand All @@ -304,7 +317,7 @@ def tensormap_to_matrix(mol, tensor):
Args:
mol (pyscf Mole): pyscf Mole object.
tensor (equistore TensorMap): Tensor.
tensor (metatensor TensorMap): Tensor.
Returns:
A numpy ndarray (matrix).
Expand Down Expand Up @@ -352,7 +365,7 @@ def array_to_tensormap(mol, v):
v (numpy ndarray): Array. It can be a vector or a matrix.
Returns:
A equistore tensor map.
A metatensor tensor map.
"""
if v.ndim==1:
return vector_to_tensormap(mol, v)
Expand All @@ -367,15 +380,15 @@ def tensormap_to_array(mol, tensor):
Args:
mol (pyscf Mole): pyscf Mole object.
tensor (equistore TensorMap): Tensor.
tensor (metatensor TensorMap): Tensor.
Returns:
A numpy ndarray. Matrix or vector, depending on the key names of the tensor.
"""

if tensor.keys.names==tuple(vector_label_names.tm):
if tensor.keys.names==vector_label_names.tm:
return tensormap_to_vector(mol, tensor)
elif tensor.keys.names==tuple(matrix_label_names.tm):
elif tensor.keys.names==matrix_label_names.tm:
return tensormap_to_matrix(mol, tensor)
else:
raise Exception(f'Tensor key names mismatch. Cannot determine if it is a vector or a matrix')
Expand All @@ -385,16 +398,17 @@ def join(tensors):
"""Merge two or more tensors with the same label names avoiding information duplictaion.
Args:
tensors (list): List of equistore TensorMap.
tensors (list): List of metatensor TensorMap.
Returns:
A equistore TensorMap containing the information of all the input tensors.
A metatensor TensorMap containing the information of all the input tensors.
"""

if not all(tensor.keys.names==tensors[0].keys.names for tensor in tensors):
raise Exception(f'Cannot merge tensors with different label names')
tm_label_vals = sorted(list(set().union(*[set(tensor.keys.tolist()) for tensor in tensors])))
tm_labels = equistore.Labels(tensors[0].keys.names, np.array(tm_label_vals))
tm_label_vals = set().union(*[set(_labels_to_array(tensor.keys)) for tensor in tensors])
tm_label_vals = sorted((tuple(value) for value in tm_label_vals))
tm_labels = metatensor.Labels(tensors[0].keys.names, np.array(tm_label_vals))

blocks = {}
block_comp_labels = {}
Expand All @@ -403,7 +417,7 @@ def join(tensors):
block_samp_label_vals = {}

for label in tm_labels:
key = tuple(label.tolist())
key = tuple(label.values)
blocks[key] = []
block_samp_label_vals[key] = []
for imol,tensor in enumerate(tensors):
Expand All @@ -420,10 +434,10 @@ def join(tensors):
for key in blocks:
blocks[key] = np.concatenate(blocks[key])
block_samp_label_vals[key] = np.array(block_samp_label_vals[key])
block_samp_labels[key] = equistore.Labels((_molid_name, *tensor.sample_names), block_samp_label_vals[key])
block_samp_labels[key] = metatensor.Labels((_molid_name, *tensor.sample_names), block_samp_label_vals[key])

tensor_blocks = [equistore.TensorBlock(values=blocks[key], samples=block_samp_labels[key], components=block_comp_labels[key], properties=block_prop_labels[key]) for key in tm_label_vals]
tensor = equistore.TensorMap(keys=tm_labels, blocks=tensor_blocks)
tensor_blocks = [metatensor.TensorBlock(values=blocks[key], samples=block_samp_labels[key], components=block_comp_labels[key], properties=block_prop_labels[key]) for key in tm_label_vals]
tensor = metatensor.TensorMap(keys=tm_labels, blocks=tensor_blocks)

return tensor

Expand All @@ -432,17 +446,20 @@ def split(tensor):
"""Split a tensor based on the molecule information stored within the input TensorMap.
Args:
tensor (equistore TensorMap): Tensor containing several molecules.
tensor (metatensor TensorMap): Tensor containing several molecules.
Returns:
N equistore TensorMap, where N is equal to the total number of diferent molecules stored within the input TensorMap.
N metatensor TensorMap, where N is equal to the total number of diferent molecules stored within the input TensorMap.
"""

if tensor.sample_names[0]!=_molid_name:
raise Exception(f'Tensor does not seem to contain several molecules')

# Check if the molecule indices are continuous
mollist = sorted(set(np.hstack([np.array(tensor.block(keys).samples.tolist())[:,0] for keys in tensor.keys])))
mollist = sorted(reduce(
lambda a,b: a.union(b),
[set(block.samples.column(_molid_name)) for block in tensor.blocks()]
))
if mollist==list(range(len(mollist))):
tensors = [None] * len(mollist)
else:
Expand All @@ -451,8 +468,8 @@ def split(tensor):
# Common labels
block_comp_labels = {}
block_prop_labels = {}
for label in tensor.keys:
key = label.tolist()
for label,block in tensor.items():
key = tuple(label.values)
block = tensor.block(label)
block_comp_labels[key] = block.components
block_prop_labels[key] = block.properties
Expand All @@ -463,20 +480,22 @@ def split(tensor):
block_samp_labels = {}

for label in tensor.keys:
key = label.tolist()
key = tuple(label.values)
block = tensor.block(label)

samplelbl = [lbl for lbl in block.samples.tolist() if lbl[0]==imol]
if len(samplelbl)==0:
samples = [(sample_i,lbl) for sample_i,lbl in enumerate(block.samples.values) if lbl[0]==imol]
if len(samples)==0:
continue
sampleidx = [block.samples.position(lbl) for lbl in samplelbl]
sampleidx = [t[0] for t in samples]
samplelbl = [t[1] for t in samples]
#sampleidx = [block.samples.position(lbl) for lbl in samplelbl]

blocks[key] = block.values[sampleidx]
block_samp_labels[key] = equistore.Labels(tensor.sample_names[1:], np.array(samplelbl)[:,1:])
block_samp_labels[key] = metatensor.Labels(tensor.sample_names[1:], np.array(samplelbl)[:,1:])

tm_label_vals = sorted(list(blocks.keys()))
tm_labels = equistore.Labels(tensor.keys.names, np.array(tm_label_vals))
tensor_blocks = [equistore.TensorBlock(values=blocks[key], samples=block_samp_labels[key], components=block_comp_labels[key], properties=block_prop_labels[key]) for key in tm_label_vals]
tensors[imol] = equistore.TensorMap(keys=tm_labels, blocks=tensor_blocks)
tm_labels = metatensor.Labels(tensor.keys.names, np.array(tm_label_vals))
tensor_blocks = [metatensor.TensorBlock(values=blocks[key], samples=block_samp_labels[key], components=block_comp_labels[key], properties=block_prop_labels[key]) for key in tm_label_vals]
tensors[imol] = metatensor.TensorMap(keys=tm_labels, blocks=tensor_blocks)

return tensors
16 changes: 16 additions & 0 deletions qstack/qml.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
# qstack.qml is in a different python package
# but prefer "the local version of it" if we are in a development environment, and both sources are there.
import os
_qstack_qml_path = os.path.join(os.path.dirname(__file__), 'qstack-qml')
if os.path.isfile(os.path.join(_qstack_qml_path, 'qstack_qml', '__init__.py')):
import sys
sys.path.insert(0,_qstack_qml_path)
from qstack_qml import *
sys.path.pop(0)
del sys
else:
try:
from qstack_qml import *
except ImportError:
pass
del os, _qstack_qml_path
2 changes: 1 addition & 1 deletion qstack/regression/kernel_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -205,7 +205,7 @@ def cdist(X, Y):
x = np.array([x] * len(Y))
d = np.abs(x-Y)
while len(d.shape)>1:
d = np.sum(d, axis=1) # several axis available for np > 1.7.0
d = np.sum(d, axis=1) # several axis available for np > 1.7.0 (TODO shall we move this)
K[i,:] = d
return K
K = -gamma * cdist(X, Y)
Expand Down
19 changes: 19 additions & 0 deletions requirements.py3.11.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
attrs==21.4.0
certifi==2021.10.8
h5py==3.11.0
iniconfig==1.1.1
packaging==21.3
pluggy==1.0.0
py==1.11.0
pyparsing==3.0.6
pyscf==2.2.0
pytest==6.2.5
numpy===1.22.3
scipy==1.10
toml==0.10.2
scikit-learn==1.5.0
ase==3.22
tqdm==4.66
metatensor-core==0.1.8
cell2mol @ git+https://github.com/lcmd-epfl/cell2mol.git@22473bbf12a013467137a55a63c88fbbdc95baa2
qstack/qstack-qml
7 changes: 3 additions & 4 deletions requirements.txt → requirements.py3.9.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,16 +5,15 @@ iniconfig==1.1.1
packaging==21.3
pluggy==1.0.0
py==1.11.0
#cython==0.29.24
pyparsing==3.0.6
pyscf==2.0.1
pytest==6.2.5
numpy===1.22.3
scipy==1.10
toml==0.10.2
scikit-learn==0.24.2
#scikit-learn==1.0.2
ase==3.22
tqdm==4.66
equistore-core @ git+https://github.com/lab-cosmo/equistore.git@e5b9dc365369ba2584ea01e9d6a4d648008aaab8#subdirectory=python/equistore-core

metatensor-core==0.1.8
cell2mol @ git+https://github.com/lcmd-epfl/cell2mol.git@22473bbf12a013467137a55a63c88fbbdc95baa2
qstack/qstack-qml
Loading

0 comments on commit c6e487e

Please sign in to comment.