Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

small-fixes #72

Open
wants to merge 23 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 15 commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
a5c60c5
Fixed `printlevel` parameter and missing case
YAY-C Jun 30, 2024
f30deb8
Dirty fix for open-shell evaluation (closes #71)
YAY-C Jul 2, 2024
94d2d43
fixed is-statement for numpy.ndarrays
YAY-C Jul 2, 2024
5e2e26f
Added `srcdir` kwarg for xyz-lists
YAY-C Jul 2, 2024
74f729a
Add test for list of xyz-files and add test-data
YAY-C Jul 2, 2024
9bb37c2
Fix flattening of atom symbols
YAY-C Jul 4, 2024
5d46fa2
Add `srcdir` argument to single representation loading
YAY-C Jul 24, 2024
ead08aa
Fix `only-z` argument passing and fixed dimensions for `all_atoms`
YAY-C Jul 25, 2024
ca8b08b
remove unnecessary `squeeze()`
YAY-C Oct 23, 2024
80b18b4
Merge branch 'master' into small-fixes
YAY-C Nov 15, 2024
049ddb1
spin/charge loader for .txt file only
YAY-C Dec 6, 2024
69f67f7
missing charge/spin test txt-file
YAY-C Dec 6, 2024
96f05f2
removed `spin` ARG in `bond()`
YAY-C Dec 6, 2024
e3a951d
fixed spin evaluation and output-filename
YAY-C Dec 6, 2024
8a4afbb
add closed-shell rep test and fixed `test_ecp`
YAY-C Dec 6, 2024
878ef50
should fix GH-checks
YAY-C Dec 18, 2024
ccee36e
re-fix yml env
YAY-C Dec 18, 2024
2d65da9
fix default species determination (now on-the-fly)
YAY-C Dec 18, 2024
f12b9aa
try another fix
YAY-C Dec 18, 2024
b7da960
updated nupmy version and exported new conda env
YAY-C Dec 19, 2024
e1c9cca
final fix (I swear to GOD)
YAY-C Dec 19, 2024
8e9ee0e
Merge branch 'master' into small-fixes
YAY-C Dec 20, 2024
d79bac3
Merge branch 'master' into small-fixes
YAY-C Jan 17, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 18 additions & 15 deletions qstack/spahm/rho/bond.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,14 @@
import os
import argparse
import numpy as np
from itertools import chain
from qstack.tools import correct_num_threads
from . import utils, dmb_rep_bond as dmbb
from .utils import defaults

def bond(mols, dms,
bpath=defaults.bpath, cutoff=defaults.cutoff, omods=defaults.omod,
spin=None, elements=None, only_m0=False, zeros=False, printlevel=0,
elements=None, only_m0=False, zeros=False, printlevel=0,
pairfile=None, dump_and_exit=False, same_basis=False, only_z=[]):
""" Computes SPAHM-b representations for a set of molecules.

Expand All @@ -19,7 +20,6 @@ def bond(mols, dms,
- bpath (str): path to the directory containing bond-optimized basis-functions (.bas)
- cutoff (float): the cutoff distance (angstrom) between atoms to be considered as bond
- omods (list of str): the selected mode for open-shell computations
- spin (list of int): list of spins for each molecule
- elements (list of str): list of all elements present in the set of molecules
- only_m0 (bool): use only basis functions with `m=0`
- zeros (bool): add zeros features for non-existing bond pairs
Expand All @@ -39,8 +39,6 @@ def bond(mols, dms,
elements, mybasis, qqs0, qqs4q, idx, M = dmbb.read_basis_wrapper(mols, bpath, only_m0, printlevel,
elements=elements, cutoff=cutoff,
pairfile=pairfile, dump_and_exit=dump_and_exit, same_basis=same_basis)
if spin is None:
omods = [None]
qqs = qqs0 if zeros else qqs4q
maxlen = max([dmbb.bonds_dict_init(qqs[q0], M)[1] for q0 in elements])
if len(only_z) > 0:
Expand All @@ -58,7 +56,7 @@ def bond(mols, dms,
for imol, (mol, dm) in enumerate(zip(mols,dms)):
if printlevel>0: print('mol', imol, flush=True)
for iomod, omod in enumerate(omods):
DM = utils.dm_open_mod(dm, omod) if spin else dm
YAY-C marked this conversation as resolved.
Show resolved Hide resolved
DM = utils.dm_open_mod(dm, omod)
vec = dmbb.repr_for_mol(mol, DM, qqs, M, mybasis, idx, maxlen, cutoff, only_z=only_z)
allvec[iomod,imol,:len(vec)] = vec

Expand Down Expand Up @@ -112,23 +110,25 @@ def get_repr(mols, xyzlist, guess, xc=defaults.xc, spin=None, readdm=None,
dms = []

if len(only_z) > 0:
all_atoms = np.array([z for mol in mols for z in mol.elements if z in only_z])
all_atoms = np.array([z for mol in mols for z in mol.elements if z in only_z], ndmin=2)
else:
all_atoms = np.array([mol.elements for mol in mols])
spin = np.array(spin) ## a bit dirty but couldn't find a better way to ensure Iterable type!
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

a pity that it has to be done twice when calling the main() script, but couldn't find any work around !

if (spin == None).all():
omods = [None]

allvec = bond(mols, dms, bpath, cutoff, omods,
spin=spin, elements=elements,
only_m0=only_m0, zeros=zeros, printlevel=printlevel,
elements=elements, only_m0=only_m0,
zeros=zeros, printlevel=printlevel,
pairfile=pairfile, dump_and_exit=dump_and_exit, same_basis=same_basis, only_z=only_z)
maxlen=allvec.shape[-1]
natm = allvec.shape[-2]

if split is False:
shape = (len(omods), -1, maxlen)
atidx = np.where(np.array([[1]*len(zin) + [0]*(natm-len(zin)) for zin in all_atoms]).flatten())
allvec = allvec.reshape(shape)[:,atidx,:].reshape(shape)
all_atoms = all_atoms.flatten()
allvec = allvec.squeeze()
all_atoms = list(chain.from_iterable(all_atoms))
#allvec = allvec.squeeze()
elif with_symbols:
msg = f"You can not use 'split=True' and 'with_symbols=True' at the same time!"
raise RuntimeError()
Expand Down Expand Up @@ -180,8 +180,8 @@ def main():

if args.filename.endswith('xyz'):
xyzlist = [args.filename]
charge = [int(args.charge) if args.charge is not None else 0]
spin = [int(args.spin) if args.spin is not None else None]
charge = np.array([int(args.charge) if args.charge is not None else 0])
spin = np.array([int(args.spin) if args.spin is not None else None])
else:
xyzlistfile = args.filename
xyzlist = utils.get_xyzlist(xyzlistfile)
Expand All @@ -192,10 +192,13 @@ def main():
reps = get_repr(mols, xyzlist, args.guess, xc=args.xc, spin=spin, readdm=args.readdm, printlevel=args.print,
pairfile=args.pairfile, dump_and_exit=args.dump_and_exit, same_basis=args.same_basis,
bpath=args.bpath, cutoff=args.cutoff, omods=args.omod, with_symbols=args.with_symbols,
elements=args.elements, only_m0=args.only_m0, zeros=args.zeros, split=args.split)
elements=args.elements, only_m0=args.only_m0, zeros=args.zeros, split=args.split, only_z=args.only_z)
if args.print > 0: print(reps.shape)
if args.merge:
np.save(args.name_out+'_'+'_'.join(args.omod), reps)
if (spin == None).all():
np.save(args.name_out, reps)
else:
np.save(args.name_out+'_'+'_'.join(args.omod), reps)
else:
for vec, omod in zip(reps, args.omod):
np.save(args.name_out+'_'+omod, vec)
Expand Down
34 changes: 23 additions & 11 deletions qstack/spahm/rho/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,21 +21,29 @@


def get_chsp(fname, n):
if fname:
chsp = np.loadtxt(fname, dtype=int, ndmin=1)
def chsp_converter(chsp):
if chsp == 'None':
chsp = None
else:
chsp = int(chsp)
return chsp
if os.path.isfile(fname):
chsp = np.loadtxt(fname, dtype=object, converters=chsp_converter, encoding=None)
if(len(chsp)!=n):
raise RuntimeError(f'Wrong lengh of the file {fname}')
else:
chsp = np.zeros(n, dtype=int)
raise RuntimeError(f"{fname} can not be found")
return chsp


def load_mols(xyzlist, charge, spin, basis, printlevel=0, units='ANG', ecp=None, progress=False):
def load_mols(xyzlist, charge, spin, basis, printlevel=0, units='ANG', ecp=None, progress=False, srcdir=None):
mols = []
if progress:
import tqdm
xyzlist = tqdm.tqdm(xyzlist)
for xyzfile, ch, sp in zip(xyzlist, charge, spin):
if srcdir is not None:
xyzfile = srcdir+xyzfile
if printlevel>0: print(xyzfile, flush=True)
mols.append(compound.xyz_to_mol(xyzfile, basis,
charge=0 if ch is None else ch,
Expand All @@ -45,14 +53,14 @@ def load_mols(xyzlist, charge, spin, basis, printlevel=0, units='ANG', ecp=None,
return mols


def mols_guess(mols, xyzlist, guess, xc=defaults.xc, spin=None, readdm=False, printlevel=0):
def mols_guess(mols, xyzlist, guess, xc=defaults.xc, spin=[None], readdm=False, printlevel=0):
dms = []
guess = guesses.get_guess(guess)
for xyzfile, mol in zip(xyzlist, mols):
for xyzfile, mol, sp in zip(xyzlist, mols, spin):
if printlevel>0: print(xyzfile, flush=True)
if not readdm:
e, v = spahm.get_guess_orbitals(mol, guess, xc=xc)
dm = guesses.get_dm(v, mol.nelec, mol.spin if spin is not None else None)
dm = guesses.get_dm(v, mol.nelec, mol.spin if sp is not None else None)
else:
dm = np.load(readdm+'/'+os.path.basename(xyzfile)+'.npy')
if spin and dm.ndim==2:
Expand All @@ -66,7 +74,8 @@ def dm_open_mod(dm, omod):
dmmod = {'sum': lambda dm: dm[0]+dm[1],
'diff': lambda dm: dm[0]-dm[1],
'alpha': lambda dm: dm[0],
'beta': lambda dm: dm[1]}
'beta': lambda dm: dm[1],
None: lambda dm: dm}
return dmmod[omod](dm)


Expand Down Expand Up @@ -134,8 +143,8 @@ def load_reps(f_in, from_list=True, srcdir=None, with_labels=False,
else:
is_single, is_labeled = file_format['is_single'], file_format['is_labeled']
# if the given file contains a single representation create a one-element list
Xs = [np.load(f_in, allow_pickle=True)] if is_single else np.load(f_in, allow_pickle=True)
print(f"Loading {len(Xs)} representations (local = {local}, labeled = {is_labeled})")
Xs = [np.load(os.path.join(path2list,f_in), allow_pickle=True)] if is_single else np.load(os.path.join(path2list,f_in), allow_pickle=True)
if printlevel > 1: print(f"Loading {len(Xs)} representations (local = {local}, labeled = {is_labeled})")
if progress:
import tqdm
Xs = tqdm.tqdm(Xs)
Expand All @@ -150,7 +159,10 @@ def load_reps(f_in, from_list=True, srcdir=None, with_labels=False,
reps.extend(x[:,1])
labels.extend(x[:,0])
else:
reps.extend(x)
if sum_local:
reps.append(x.sum(axis=0))
else:
reps.extend(x)
else:
if is_labeled:
reps.append(x[1])
Expand Down
Binary file added tests/data/H2O_spahm_b.npy
Binary file not shown.
Binary file not shown.
3 changes: 3 additions & 0 deletions tests/data/list_water.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
H2O_dist.xyz
H2O.xyz
rotated_H2O.xyz
3 changes: 3 additions & 0 deletions tests/data/list_water_charges.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
0
0
0
3 changes: 3 additions & 0 deletions tests/data/list_water_spins.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
0
0
0
39 changes: 33 additions & 6 deletions tests/test_spahm_b.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,19 +9,31 @@ def test_water():
mols = utils.load_mols([xyz_in], [0], [0], 'minao')
dms = utils.mols_guess(mols, [xyz_in], 'LB', spin=[0])
X = bond.get_repr(mols, [xyz_in], 'LB', spin=[0], with_symbols=False, same_basis=False)
#X = np.hstack(X) # merging alpha-beta components for spin unrestricted representation #TODO: should be included into function not in main
true_file = path+'/data/H2O_spahm_b.npy_alpha_beta.npy'
X_true = np.load(true_file)
assert(X_true.shape == X.shape)
for Xa, Xa_true in zip(X, X_true):
assert(np.linalg.norm(Xa-Xa_true) < 1e-8) # evaluating representation diff as norm (threshold = 1e-8)

def test_water_closed():
path = os.path.dirname(os.path.realpath(__file__))
xyz_in = path+'/data/H2O.xyz'
mols = utils.load_mols([xyz_in], [None], [0], 'minao')
dms = utils.mols_guess(mols, [xyz_in], 'LB', spin=[None])
X = bond.get_repr(mols, [xyz_in], 'LB', spin=[None], with_symbols=False, same_basis=False)
true_file = path+'/data/H2O_spahm_b.npy'
X_true = np.load(true_file)
print(X_true.shape)
assert(X_true.shape == X.shape)
for Xa, Xa_true in zip(X, X_true):
assert(np.linalg.norm(Xa-Xa_true) < 1e-8) # evaluating representation diff as norm (threshold = 1e-8)

def test_water_O_only():
path = os.path.dirname(os.path.realpath(__file__))
xyz_in = path+'/data/H2O.xyz'
mols = utils.load_mols([xyz_in], [0], [0], 'minao')
dms = utils.mols_guess(mols, [xyz_in], 'LB', spin=[0])
X = bond.bond(mols, dms, spin=[0], only_z=['O'])
X = bond.bond(mols, dms, only_z=['O'])
X = np.squeeze(X) #contains a single elements but has shape (1,Nfeat)
X = np.hstack(X) # merging alpha-beta components for spin unrestricted representation #TODO: should be included into function not in main
true_file = path+'/data/H2O_spahm_b.npy_alpha_beta.npy'
Expand All @@ -36,7 +48,7 @@ def test_water_same_basis():
xyz_in = path+'/data/H2O.xyz'
mols = utils.load_mols([xyz_in], [0], [0], 'minao')
dms = utils.mols_guess(mols, [xyz_in], 'LB', spin=[0])
X = bond.bond(mols, dms, spin=[0], same_basis=True)
X = bond.bond(mols, dms, same_basis=True)
X = np.squeeze(X) #contains a single elements but has shape (1,Nfeat)
X = np.hstack(X) # merging alpha-beta components for spin unrestricted representation #TODO: should be included into function not in main
true_file = path+'/data/H2O_spahm_b_CCbas.npy_alpha_beta.npy'
Expand All @@ -48,9 +60,9 @@ def test_water_same_basis():
def test_ecp():
path = os.path.dirname(os.path.realpath(__file__))
xyz_in = path+'/data/I2.xyz'
mols = utils.load_mols([xyz_in], [0], [None], 'minao', ecp='def2-svp')
dms = utils.mols_guess(mols, [xyz_in], 'LB', spin=[None])
Comment on lines -51 to -52
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this was actually weird ! because even specifying spin=None the func would still returned merged alpha-beta representations (which is the X_true we loaded and evaluated) !

But it made me realized that it's a bit dangerous now too, because if you only use the bond() function with a single dm (for all electrons) and you don't change the parameters omods=[None] (default is [alpha, beta]) then you get an error ! (... I think ...)

X = bond.bond(mols, dms, spin=[None], same_basis=True)
mols = utils.load_mols([xyz_in], [0], [0], 'minao', ecp='def2-svp')
dms = utils.mols_guess(mols, [xyz_in], 'LB', spin=[0])
X = bond.bond(mols, dms, same_basis=True)
X = np.squeeze(X) #contains a single elements but has shape (1,Nfeat)
X = np.hstack(X) # merging alpha-beta components for spin unrestricted representation #TODO: should be included into function not in main
true_file = path+'/data/I2_spahm-b_minao-def2-svp_alpha-beta.npy'
Expand All @@ -59,7 +71,22 @@ def test_ecp():
for Xa, Xa_true in zip(X, X_true):
assert(np.linalg.norm(Xa-Xa_true) < 1e-8) # evaluating representation diff as norm (threshold = 1e-8)

def test_from_list():
path = os.path.dirname(os.path.realpath(__file__))
path2list = path+'/data/list_water.txt'
path2spins = path+'/data/list_water_spins.txt'
path2charges = path+'/data/list_water_charges.txt'
xyzlist = utils.get_xyzlist(path2list)
spins = utils.get_chsp(path2spins, len(xyzlist))
charges = utils.get_chsp(path2charges, len(xyzlist))
mols = utils.load_mols(xyzlist, charges, spins, 'minao', srcdir=path+'/data/')
spahm_b = bond.get_repr(mols, xyzlist, 'LB', spin=spins, same_basis=True)
Xtrue = np.load(path+'/data/list_H2O_spahm-b_minao_LB_alpha-beta.npy')
assert(np.allclose(Xtrue, spahm_b))


if __name__ == '__main__':
test_water()
test_from_list()
test_water_closed()

Loading