From ce9902d7f374cd5f67a1251cba5061014a3d278f Mon Sep 17 00:00:00 2001 From: Johnnie Gray Date: Wed, 18 Dec 2024 14:34:26 -0800 Subject: [PATCH] allow fitting for hyper TNs: add make_overlap and related methods --- quimb/tensor/__init__.py | 2 + quimb/tensor/array_ops.py | 8 ++ quimb/tensor/fitting.py | 93 +++++++----- quimb/tensor/optimize.py | 2 +- quimb/tensor/tensor_builder.py | 75 +++++++++- quimb/tensor/tensor_core.py | 225 ++++++++++++++++++++++++++---- tests/test_tensor/test_fitting.py | 14 ++ 7 files changed, 355 insertions(+), 64 deletions(-) diff --git a/quimb/tensor/__init__.py b/quimb/tensor/__init__.py index a34316d7..f454f855 100644 --- a/quimb/tensor/__init__.py +++ b/quimb/tensor/__init__.py @@ -110,6 +110,7 @@ HTN_dual_from_edges_and_fill_fn, HTN_from_clauses, HTN_from_cnf, + HTN_rand, HTN_random_ksat, MPO_ham_heis, MPO_ham_ising, @@ -281,6 +282,7 @@ "HTN_dual_from_edges_and_fill_fn", "HTN_from_clauses", "HTN_from_cnf", + "HTN_rand", "HTN_random_ksat", "HTN2D_classical_ising_partition_function", "HTN3D_classical_ising_partition_function", diff --git a/quimb/tensor/array_ops.py b/quimb/tensor/array_ops.py index 9890ece4..cd774b0f 100644 --- a/quimb/tensor/array_ops.py +++ b/quimb/tensor/array_ops.py @@ -234,6 +234,14 @@ def norm_fro(x): norm_fro.register("numpy", norm_fro_dense) +@norm_fro.register("autograd") +def norm_fro_autoray(x): + # seems to be bug with autograd's linalg.norm and complex numbers + # https://github.com/HIPS/autograd/issues/666 + # so implement manually + return do("sum", do("abs", x) ** 2) ** 0.5 + + def sensibly_scale(x): """Take an array and scale it *very* roughly such that random tensor networks consisting of such arrays do not have gigantic norms. diff --git a/quimb/tensor/fitting.py b/quimb/tensor/fitting.py index 4eb8ffe4..ecffd6ee 100644 --- a/quimb/tensor/fitting.py +++ b/quimb/tensor/fitting.py @@ -14,6 +14,7 @@ def tensor_network_distance( xBB=None, method="auto", normalized=False, + output_inds=None, **contract_opts, ): r"""Compute the Frobenius norm distance between two tensor networks: @@ -57,6 +58,9 @@ def tensor_network_distance( If ``'infidelity'``, compute the normalized infidelity ``1 - ||^2 / (|A| |B|)``, which can be faster to optimize e.g., but does not take into account normalization. + output_inds : sequence of str, optional + Specify the output indices of `tnA` and `tnB` to contract over. This + can be necessary if either network has hyper indices. contract_opts Supplied to :meth:`~quimb.tensor.tensor_core.TensorNetwork.contract`. @@ -69,12 +73,15 @@ def tensor_network_distance( tnA = tnA.as_network() tnB = tnB.as_network() - oix = tnA.outer_inds() - if set(oix) != set(tnB.outer_inds()): - raise ValueError( - "Can only compute distance between tensor " - "networks with matching outer indices." - ) + if output_inds is None: + oix = tnA.outer_inds() + if set(oix) != set(tnB.outer_inds()): + raise ValueError( + "Can only compute distance between tensor " + "networks with matching outer indices." + ) + else: + oix = output_inds if method == "auto": d = tnA.inds_size(oix) @@ -90,33 +97,40 @@ def tensor_network_distance( # overlap method if xAA is None: - xAA = (tnA | tnA.H).contract(**contract_opts) + # + xAA = tnA.norm(squared=True, output_inds=oix, **contract_opts) if xAB is None: - xAB = (tnA | tnB.H).contract(**contract_opts) + # + xAB = tnA.overlap(tnB, output_inds=oix, **contract_opts) if xBB is None: - xBB = (tnB | tnB.H).contract(**contract_opts) + # + xBB = tnB.norm(squared=True, output_inds=oix, **contract_opts) + + xAA = do("abs", xAA) + xBB = do("abs", xBB) + xAB = do("real", xAB) if normalized == "infidelity": # compute normalized infidelity - return 1 - do("abs", xAB**2 / (xAA * xBB)) + return 1 - xAB**2 / (xAA * xBB) if normalized == "infidelity_sqrt": # compute normalized sqrt infidelity - return 1 - do("abs", xAB / (xAA * xBB) ** 0.5) + return 1 - do("abs", xAB) / (xAA * xBB) ** 0.5 if normalized == "squared": return ( - do("abs", xAA + xBB - 2 * do("real", xAB)) + do("abs", xAA + xBB - 2 * xAB) # divide by average norm-squared of A and B * 2 - / (do("abs", xAA) + do("abs", xBB)) + / (xAA + xBB) ) ** 0.5 - dAB = do("abs", xAA + xBB - 2 * do("real", xAB)) ** 0.5 + dAB = do("abs", xAA + xBB - 2 * xAB) ** 0.5 if normalized: # divide by average norm of A and B - dAB = dAB * 2 / (do("abs", xAA) ** 0.5 + do("abs", xBB) ** 0.5) + dAB = dAB * 2 / (xAA**0.5 + xBB**0.5) return dAB @@ -130,6 +144,7 @@ def tensor_network_fit_autodiff( contract_optimize="auto-hq", distance_method="auto", normalized="squared", + output_inds=None, xBB=None, inplace=False, progbar=False, @@ -164,6 +179,9 @@ def tensor_network_fit_autodiff( If ``'infidelity'``, compute the normalized infidelity ``1 - ||^2 / (|A| |B|)``, which can be faster to optimize e.g., but does not take into account normalization. + output_inds : sequence of str, optional + Specify the output indices of `tnA` and `tnB` to contract over. This + can be necessary if either network has hyper indices. xBB : float, optional If you already know, have computed ``tn_target.H @ tn_target``, or don't care about the overall scale of the norm distance, you can supply @@ -183,9 +201,8 @@ def tensor_network_fit_autodiff( from .tensor_core import tensor_network_distance if xBB is None: - xBB = (tn_target | tn_target.H).contract( - output_inds=(), - optimize=contract_optimize, + xBB = tn_target.norm( + squared=True, output_inds=output_inds, optimize=contract_optimize ) tnopt = TNOptimizer( @@ -196,6 +213,7 @@ def tensor_network_fit_autodiff( "method": distance_method, "optimize": contract_optimize, "normalized": normalized, + "output_inds": output_inds, }, autodiff_backend=autodiff_backend, progbar=progbar, @@ -218,7 +236,7 @@ def vdot_broadcast(x, y): return do("sum", x * do("conj", y), axis=0) -def conjugate_gradient(A, b, x0=None, tol=1e-10, maxiter=1000): +def conjugate_gradient(A, b, x0=None, tol=1e-5, maxiter=1000): """ Conjugate Gradient solver for complex matrices/linear operators. @@ -273,7 +291,7 @@ def _tn_fit_als_core( pos_smudge=1e-15, progbar=False, ): - from .tensor_core import group_inds, TNLinearOperator + from .tensor_core import TNLinearOperator, group_inds if solver == "auto": # XXX: maybe make this depend on local tensor as well? @@ -300,7 +318,7 @@ def _tn_fit_als_core( # should we form the dense operator explicitly? if dense_solve == "auto": # only for small enough tensors - dense_i = tk.size <= 2**10 + dense_i = tk.size <= 512 else: dense_i = dense_solve @@ -367,12 +385,9 @@ def _tn_fit_als_core( b = b_tn.to_dense(rix, bix) if solver_i is None: + x0 = tk.to_dense(lix, bix) x = conjugate_gradient( - A, - b, - x0=tk.to_dense(lix, bix), - tol=tol, - maxiter=solver_maxiter, + A, b, x0=x0, tol=tol, maxiter=solver_maxiter ) elif enforce_pos or solver_i == "eigh": el, V = do("linalg.eigh", A) @@ -429,6 +444,7 @@ def tensor_network_fit_als( tnAA=None, tnAB=None, xBB=None, + output_inds=None, contract_optimize="auto-hq", inplace=False, progbar=False, @@ -517,19 +533,25 @@ def tensor_network_fit_als( # form the norm of the varying TN (A) and its overlap with the target (B) if tnAA is None or tnAB is None: - tn_fit_conj = tn_fit.conj().retag_({"__KET__": "__BRA__"}) + tn_fit_conj = tn_fit.conj(mangle_inner=True, output_inds=output_inds) + tn_fit_conj.retag_({"__KET__": "__BRA__"}) else: tn_fit_conj = None + if tnAA is None: - tnAA = tn_fit | tn_fit_conj + # + tnAA = tn_fit.combine( + tn_fit_conj, virtual=True, check_collisions=False + ) if tnAB is None: - tnAB = tn_target | tn_fit_conj - + # + tnAB = tn_target.combine( + tn_fit_conj, virtual=True, check_collisions=False + ) if (tol != 0.0 or progbar) and (xBB is None): # - xBB = (tn_target | tn_target.H).contract( - optimize=contract_optimize, - output_inds=(), + xBB = tn_target.norm( + squared=True, output_inds=output_inds, optimize=contract_optimize ) if pos_smudge is None: @@ -619,10 +641,7 @@ def tensor_network_fit_tree( TensorNetwork """ if xBB is None: - xBB = (tn_target | tn_target.H).contract( - optimize=contract_optimize, - output_inds=(), - ) + xBB = tn_target.norm(squared=True, optimize=contract_optimize) tn_fit = tn.conj(inplace=inplace) tnAB = tn_fit | tn_target diff --git a/quimb/tensor/optimize.py b/quimb/tensor/optimize.py index 6e6650c7..1b4eb9f1 100644 --- a/quimb/tensor/optimize.py +++ b/quimb/tensor/optimize.py @@ -462,7 +462,7 @@ class AutoGradHandler: def __init__(self, device="cpu"): if device != "cpu": raise ValueError( - "`autograd` currently is only " "backed by cpu, numpy arrays." + "`autograd` currently is only backed by cpu, numpy arrays." ) def to_variable(self, x): diff --git a/quimb/tensor/tensor_builder.py b/quimb/tensor/tensor_builder.py index 0f131573..94b56fa0 100644 --- a/quimb/tensor/tensor_builder.py +++ b/quimb/tensor/tensor_builder.py @@ -18,10 +18,10 @@ ) from ..gen.rand import ( choice, + get_rand_fill_fn, rand_phase, randn, random_seed_fn, - get_rand_fill_fn, ) from ..utils import concat, deprecated, unique from .array_ops import asarray, do, reshape, sensibly_scale @@ -831,6 +831,79 @@ def TN_from_strings( return tn +def HTN_rand( + n, + reg, + n_out=0, + n_hyper_in=0, + n_hyper_out=0, + d_min=2, + d_max=3, + seed=None, + dtype="float64", + dist="normal", + scale=1.0, + loc=0.0, + site_ind_id="k{}", +): + """Create a random, possibly hyper, tensor network, with a mix of normal + and hyper inner and outer indices. Useful for testing edges cases. + + Parameters + ---------- + n : int + The number of tensors. + reg : int + The average degree (number of dimensions per tensor) of the tensor + network (prior to placing extra indices). + n_out : int, optional + The number of normal outer indices to add (i.e. appear exactly once). + n_hyper_in : int, optional + The number of hyper inner indices to add (i.e. appear on three or more + tensors). + n_hyper_out : int, optional + The number of hyper outer indices to add (i.e. appear in output and + two or more tensors). + d_min : int, optional + The minimum size of any dimension. + d_max : int, optional + The maximum size of any dimension. + seed : int, optional + A random seed. + + Returns + ------- + TensorNetwork + """ + import cotengra as ctg + + fill_fn = get_rand_fill_fn( + dtype=dtype, dist=dist, scale=scale, loc=loc, seed=seed + ) + + inputs, output, shapes, _ = ctg.utils.rand_equation( + n=n, + reg=reg, + n_out=n_out, + n_hyper_in=n_hyper_in, + n_hyper_out=n_hyper_out, + d_min=d_min, + d_max=d_max, + seed=seed, + ) + + # map output symbols to explicitly enumerated indices + outmap = {ix: site_ind_id.format(i) for i, ix in enumerate(output)} + + return TensorNetwork( + Tensor( + data=fill_fn(shape), + inds=[outmap.get(ix, ix) for ix in term], + ) + for term, shape in zip(inputs, shapes) + ) + + def HTN_CP_from_inds_and_fill_fn( fill_fn, inds, diff --git a/quimb/tensor/tensor_core.py b/quimb/tensor/tensor_core.py index f97da7c2..99567493 100644 --- a/quimb/tensor/tensor_core.py +++ b/quimb/tensor/tensor_core.py @@ -2708,7 +2708,7 @@ def idxmax(self, f=None): idx = np.unravel_index(flat_idx, self.shape) return dict(zip(self.inds, idx)) - def norm(self): + def norm(self, squared=False, **contract_opts): r"""Frobenius norm of this tensor: .. math:: @@ -2718,7 +2718,35 @@ def norm(self): where the trace is taken over all indices. Equivalent to the square root of the sum of squared singular values across any partition. """ - return norm_fro(self.data) + # NOTE: for compatibility with TN.norm, we accept contract_opts + norm = norm_fro(self.data) + if squared: + return norm ** 2 + return norm + + def overlap(self, other, **contract_opts): + r"""Overlap of this tensor with another tensor: + + .. math:: + + \langle o | t \rangle = \mathrm{Tr} \left(o^{\dagger} t\right) + + where the trace is taken over all indices. + + Parameters + ---------- + other : Tensor or TensorNetwork + The other tensor or network to overlap with. This tensor will be + conjugated. + + Returns + ------- + scalar + """ + if isinstance(other, Tensor): + return other.conj() @ self + else: + return conj(other.overlap(self, **contract_opts)) def normalize(self, inplace=False): T = self if inplace else self.copy() @@ -4056,6 +4084,7 @@ def __ior__(self, tensor): return self def _modify_tensor_tags(self, old, new, tid): + # XXX: change to generators here? self._unlink_tags(old - new, tid) self._link_tags(new - old, tid) @@ -4266,8 +4295,25 @@ def mangle_inner_(self, append=None, which=None): self.reindex_(reindex_map) return self - def conj(self, mangle_inner=False, inplace=False): - """Conjugate all the tensors in this network (leaves all indices).""" + def conj(self, mangle_inner=False, output_inds=None, inplace=False): + """Conjugate all the tensors in this network (leave all outer indices). + + Parameters + ---------- + mangle_inner : {bool, str, None}, optional + Whether to mangle the inner indices of the network. If a string is + given, it will be appended to the index names. + output_inds : sequence of str, optional + If given, the indices to mangle will be restricted to those not in + this list. This is only needed for (hyper) tensor networks where + output indices are not given simply by those that appear once. + inplace : bool, optional + Whether to perform the conjugation inplace or not. + + Returns + ------- + TensorNetwork + """ tn = self if inplace else self.copy() for t in tn: @@ -4275,7 +4321,14 @@ def conj(self, mangle_inner=False, inplace=False): if mangle_inner: append = None if mangle_inner is True else str(mangle_inner) - tn.mangle_inner_(append) + + # allow explicitly setting which indices to mangle + if output_inds is None: + which = None + else: + which = oset(tn.ind_map) - tags_to_oset(output_inds) + + tn.mangle_inner_(append=append, which=which) # if we have fermionic data, need to phase dual outer indices ex_data = next(iter(tn.tensor_map.values())).data @@ -4313,7 +4366,62 @@ def largest_element(self): """ return prod(t.largest_element() for t in self) - def norm(self, **contract_opts): + def make_norm( + self, + mangle_append="*", + layer_tags=("KET", "BRA"), + output_inds=None, + return_all=False, + ): + """Make the norm (squared) tensor network of this tensor network + ``tn.H & tn``. This deterministally mangles the inner indices of the + bra to avoid clashes with the ket, and also adds tags to the top and + bottom layers. If the tensor network has hyper outer indices, you may + need to specify the output indices. This allows 'hyper' norms. + + Parameters + ---------- + mangle_append : {str, False or None}, optional + How to mangle the inner indices of the bra. + layer_tags : (str, str), optional + The tags to identify the top and bottom. + output_inds : sequence of str, optional + If given, the indices to mangle will be restricted to those not in + this list. This is only needed for (hyper) tensor networks where + output indices are not given simply by those that appear once. + return_all : bool, optional + Return the norm, the ket and the bra. These are virtual, i.e. are + views of the same tensors. + + Returns + ------- + tn_norm : TensorNetwork + """ + ket = self.copy() + + if layer_tags: + ket.add_tag(layer_tags[0]) + + bra = ket.conj( + mangle_inner=mangle_append, + output_inds=output_inds, + ) + if layer_tags: + bra.retag_({layer_tags[0]: layer_tags[1]}) + + norm = ket.combine( + bra, + # both are already copies + virtual=True, + # mangling already avoids clashes + check_collisions=not mangle_append + ) + + if return_all: + return norm, ket, bra + return norm + + def norm(self, output_inds=None, squared=False, **contract_opts): r"""Frobenius norm of this tensor network. Computed by exactly contracting the TN with its conjugate: @@ -4324,41 +4432,109 @@ def norm(self, **contract_opts): where the trace is taken over all indices. Equivalent to the square root of the sum of squared singular values across any partition. """ - norm = self | self.conj() - return norm.contract(**contract_opts) ** 0.5 + tn_norm = self.make_norm(output_inds=output_inds, layer_tags=None) + norm2 = tn_norm.contract(output_inds=(), **contract_opts) + if squared: + return norm2 + return norm2 ** 0.5 - def make_norm( + def make_overlap( self, - mangle_append="*", + other, layer_tags=("KET", "BRA"), + output_inds=None, return_all=False, ): - """Make the norm tensor network of this tensor network ``tn.H & tn``. + """Make the overlap tensor network of this tensor network with another + tensor network `other.H & self`. This deterministally mangles the inner + indices of the bra to avoid clashes with the ket, and also adds tags to + the top and bottom layers. If the tensor network has hyper outer + indices, you may need to specify the output indices. This allows + 'hyper' overlaps. Parameters ---------- - mangle_append : {str, False or None}, optional - How to mangle the inner indices of the bra. + other : TensorNetwork + The other tensor network to overlap with, it should have the same + outer indices as this tensor network, all other indices will be + explicitly mangled in the copy taken, allowing 'hyper' overlaps. + This tensor network will be conjugated in the overlap. layer_tags : (str, str), optional The tags to identify the top and bottom. + output_inds : sequence of str, optional + If given, the indices to mangle will be restricted to those not in + this list. This is only needed for (hyper) tensor networks where + output indices are not given simply by those that appear once. return_all : bool, optional - Return the norm, the ket and the bra. + Return the overlap, the ket and the bra. These are virtual, i.e. + are views of the same tensors. Returns ------- - norm : TensorNetwork + tn_overlap : TensorNetwork """ ket = self.copy() - ket.add_tag(layer_tags[0]) + if layer_tags: + ket.add_tag(layer_tags[0]) - bra = ket.retag({layer_tags[0]: layer_tags[1]}) - bra.conj_(mangle_append) + if output_inds is None: + output_inds = ket.outer_inds() + + bra = other.as_network().conj( + mangle_inner=True, + output_inds=output_inds, + ) + if layer_tags: + bra.add_tag(layer_tags[1]) - norm = ket | bra + overlap = ket.combine( + bra, + # both are already copies + virtual=True, + # mangling already avoids clashes + check_collisions=False, + ) if return_all: - return norm, ket, bra - return norm + return overlap, ket, bra + + return overlap + + def overlap(self, other, output_inds=None, **contract_opts): + r"""Overlap of this tensor network with another tensor network. Computed + by exactly contracting the TN with the conjugate of the other TN: + + .. math:: + + \langle O, T \rangle = \mathrm{Tr} \left(O^{\dagger} T\right) + + where the trace is taken over all indices. This supports 'hyper' + tensor networks, where the output indices are not simply those that + appear once. + + Parameters + ---------- + other : TensorNetwork + The other tensor network to overlap with, it should have the same + outer indices as this tensor network, all other indices will be + explicitly mangled in the copy taken, allowing 'hyper' overlaps. + This tensor network will be conjugated in the overlap. + output_inds : sequence of str, optional + If given, the indices to mangle will be restricted to those not in + this list. This is only needed for (hyper) tensor networks where + output indices are not given simply by those that appear once. + contract_opts + Supplied to :meth:`~quimb.tensor.tensor_contract` for the + contraction. + + Returns + ------- + scalar + """ + tn_overlap = self.make_overlap( + other, output_inds=output_inds, layer_tags=None + ) + return tn_overlap.contract(output_inds=(), **contract_opts) def multiply(self, x, inplace=False, spread_over=8): """Scalar multiplication of this tensor network with ``x``. @@ -4760,10 +4936,9 @@ def _select_tids(self, tids, virtual=True): """Get a copy or a virtual copy (doesn't copy the tensors) of this ``TensorNetwork``, only with the tensors corresponding to ``tids``. """ - tn = TensorNetwork(()) + tn = self.new(like=self) for tid in tids: tn.add_tensor(self.tensor_map[tid], tid=tid, virtual=virtual) - tn.view_like_(self) return tn def _select_without_tids(self, tids, virtual=True): @@ -9121,12 +9296,12 @@ def fit( tn_target = tn_target.as_network() + if method == "als": + return tensor_network_fit_als(self, tn_target, **fitting_opts) if method == "autodiff": return tensor_network_fit_autodiff(self, tn_target, **fitting_opts) elif method == "tree": return tensor_network_fit_tree(self, tn_target, **fitting_opts) - elif method == "als": - return tensor_network_fit_als(self, tn_target, **fitting_opts) else: raise ValueError( f"Unrecognized method {method}. Should be one of: " diff --git a/tests/test_tensor/test_fitting.py b/tests/test_tensor/test_fitting.py index a509a233..3639a15f 100644 --- a/tests/test_tensor/test_fitting.py +++ b/tests/test_tensor/test_fitting.py @@ -136,3 +136,17 @@ def test_fit_partial_tags(opts, dtype): assert (k1f[2] - k1[2]).norm() > 1e-12 assert (k1f[3] - k1[3]).norm() < 1e-12 assert (k1f[4] - k1[4]).norm() > 1e-12 + + +def test_hyper_distance(): + tna = qtn.HTN_rand(10, 3, 2, 2, 2, 2, 2, seed=0, dtype="complex128") + tnb = qtn.HTN_rand(10, 3, 2, 2, 2, 2, 2, seed=1, dtype="complex128") + oix = [f"k{i}" for i in range(4)] + xa = tna.to_dense(oix) + xb = tnb.to_dense(oix) + assert tna.norm(output_inds=oix) == pytest.approx(np.linalg.norm(xa)) + assert tnb.norm(output_inds=oix) == pytest.approx(np.linalg.norm(xb)) + assert tnb.overlap(tna, output_inds=oix) == pytest.approx(np.vdot(xa, xb)) + assert tna.distance(tnb, output_inds=oix) == pytest.approx( + np.linalg.norm(xa - xb) + )