diff --git a/applications/confusion/dino_paper/dino_training.py b/applications/confusion/dino_paper/dino_training.py index 81cc587..f6e4fbe 100644 --- a/applications/confusion/dino_paper/dino_training.py +++ b/applications/confusion/dino_paper/dino_training.py @@ -59,12 +59,7 @@ parser.add_argument("-batch_rank", dest='batch_rank',required=False, default = 50, help="batch rank parameter used in sketching of Jacobian information",type=int) parser.add_argument("-l2_weight", dest='l2_weight',required=False, default = 1., help="weight for l2 term",type=float) parser.add_argument("-h1_weight", dest='h1_weight',required=False, default = 1., help="weight for h1 term",type=float) -parser.add_argument("-left_weight", dest='left_weight',required=False, default = 1., help="weight for left optimization constraint",type=float) -parser.add_argument("-right_weight", dest='right_weight',required=False, default = 1., help="weight for right optimization constraint",type=float) -# Constraint sketching -parser.add_argument("-constraint_sketching", dest='constraint_sketching',required=False, default = 0, help="to constraint sketch or not",type=int) -parser.add_argument("-output_sketch_dim", dest='output_sketch_dim',required=False, default = 50, help="output sketching rank",type=int) -parser.add_argument("-input_sketch_dim", dest='input_sketch_dim',required=False, default = 50, help="input sketching rank",type=int) + # Full J training parser.add_argument("-train_full_jacobian", dest='train_full_jacobian',required=False, default = 1, help="full J",type=int) diff --git a/applications/hyperelasticity/dino_paper/dino_training.py b/applications/hyperelasticity/dino_paper/dino_training.py new file mode 100644 index 0000000..1fe4d21 --- /dev/null +++ b/applications/hyperelasticity/dino_paper/dino_training.py @@ -0,0 +1,116 @@ +# This file is part of the dino package +# +# dino is free software: you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation, either version 2 of the License, or any later version. +# +# dino is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# If not, see . +# +# Author: Tom O'Leary-Roseberry +# Contact: tom.olearyroseberry@utexas.edu +import os, sys +os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' +os.environ['KMP_DUPLICATE_LIB_OK']='True' +os.environ["KMP_WARNINGS"] = "FALSE" +import numpy as np +import tensorflow as tf +if int(tf.__version__[0]) > 1: + import tensorflow.compat.v1 as tf + tf.disable_v2_behavior() + +import time +import pickle + +sys.path.append( os.environ.get('DINO_PATH')) +from dino import * + +# Import hyperelasticity problem specifics +sys.path.append('../') +from hyperelasticityModelSettings import hyperelasticity_problem_settings + +try: + tf.random.set_seed(0) +except: + tf.set_random_seed(0) + +from argparse import ArgumentParser + +# Arguments to be parsed from the command line execution +parser = ArgumentParser(add_help=True) +# Architectural parameters +parser.add_argument("-architecture", dest='architecture',required=False, default = 'as_dense', help="architecture type: as_dense or generic_dense",type=str) +parser.add_argument("-fixed_input_rank", dest='fixed_input_rank',required=False, default = 50, help="rank for input of AS network",type=int) +parser.add_argument("-fixed_output_rank", dest='fixed_output_rank',required=False, default = 50, help="rank for output of AS network",type=int) +parser.add_argument("-network_name", dest='network_name',required=True, help="out name for the saved weights",type=str) + +# Optimization parameters +parser.add_argument("-total_epochs", dest='total_epochs',required=False, default = 1, help="total epochs for training",type=int) + +# Loss function parameters +parser.add_argument("-target_rank", dest='target_rank',required=False, default = 50, help="target rank to be learned for Jacobian information",type=int) +parser.add_argument("-batch_rank", dest='batch_rank',required=False, default = 50, help="batch rank parameter used in sketching of Jacobian information",type=int) +parser.add_argument("-l2_weight", dest='l2_weight',required=False, default = 1., help="weight for l2 term",type=float) +parser.add_argument("-h1_weight", dest='h1_weight',required=False, default = 1., help="weight for h1 term",type=float) + +# Full J training +parser.add_argument("-train_full_jacobian", dest='train_full_jacobian',required=False, default = 1, help="full J",type=int) + + +parser.add_argument("-train_data_size", dest='train_data_size',required=False, default = 1*1024, help="training data size",type=int) +parser.add_argument("-test_data_size", dest='test_data_size',required=False, default = 1024, help="testing data size",type=int) + +args = parser.parse_args() + +# jacobian_network = None +problem_settings = hyperelasticity_problem_settings() + + +settings = jacobian_network_settings(problem_settings) + +n_obs = 50 +correlation_length = 0.3 +nx = 64 +settings['data_dir'] = '../data/hyperelasticity_n_obs_'+str(n_obs)+'_correlation_length'+str(correlation_length)+'_nx'+str(nx)+'/' + +settings['target_rank'] = args.target_rank +settings['batch_rank'] = args.batch_rank + +settings['train_data_size'] = args.train_data_size +settings['test_data_size'] = args.test_data_size + +settings['architecture'] = args.architecture +settings['depth'] = 6 + +settings['fixed_input_rank'] = args.fixed_input_rank +settings['fixed_output_rank'] = args.fixed_output_rank +settings['truncation_dimension'] = args.fixed_input_rank + +settings['train_full_jacobian'] = args.train_full_jacobian + + +if (settings['batch_rank'] == settings['target_rank']): + settings['outer_epochs'] = 1 + settings['opt_parameters']['keras_epochs'] = args.total_epochs +else: + settings['shuffle_every_epoch'] = True + settings['outer_epochs'] = args.total_epochs + settings['opt_parameters']['keras_epochs'] = 1 +settings['opt_parameters']['keras_verbose'] = True + +settings['opt_parameters']['loss_weights'] = [args.l2_weight,args.h1_weight] + +settings['network_name'] = args.network_name + +if args.l2_weight != 1.0: + settings['network_name'] += 'l2_weight_'+str(args.l2_weight) + + +jacobian_network = jacobian_training_driver(settings) + + diff --git a/applications/hyperelasticity/generateHyperelasticity.py b/applications/hyperelasticity/generateHyperelasticity.py new file mode 100644 index 0000000..86e93bc --- /dev/null +++ b/applications/hyperelasticity/generateHyperelasticity.py @@ -0,0 +1,31 @@ +# This file is part of the dino package +# +# dino is free software: you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation, either version 2 of the License, or any later version. +# +# dino is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# If not, see . +# +# Author: Tom O'Leary-Roseberry +# Contact: tom.olearyroseberry@utexas.edu + +import os +import numpy as np + +corrs = [0.3] + +nxnys = [(64,64)] + +for correlation_length in corrs: + for nx,ny in nxnys: + print(80*'#') + print(('Running for corr = '+str(correlation_length)+' nx,ny = '+str((nx,ny))).center(80)) + os.system('mpirun -n 1 python hyperelasticityProblemSetup.py -ninstance 1 -correlation_length '+str(correlation_length)+' -nx '+str(nx)+' -ny '+str(ny)) + + diff --git a/applications/hyperelasticity/hyperelasticityModelSettings.py b/applications/hyperelasticity/hyperelasticityModelSettings.py new file mode 100644 index 0000000..b2adda6 --- /dev/null +++ b/applications/hyperelasticity/hyperelasticityModelSettings.py @@ -0,0 +1,44 @@ +# This file is part of the dino package +# +# dino is free software: you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation, either version 2 of the License, or any later version. +# +# dino is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# If not, see . +# +# Author: Tom O'Leary-Roseberry +# Contact: tom.olearyroseberry@utexas.edu + +import math + +def hyperelasticity_problem_settings(settings = {}): + """ + """ + + settings['ntargets'] = 100 + settings['nx_targets'] = 10 + settings['ny_targets'] = 10 + settings['correlation_length'] = 0.3 + settings['nx'] = 64 + settings['ny'] = 64 + settings['ndim'] = 2 + settings['jacobian_full_rank'] = 50 + settings['formulation'] = 'hyperelasticity' + settings['rel_noise'] = 0.01 + + # For prior anisotropic tensor + settings['prior_theta0'] = 2.0 + settings['prior_theta1'] = 0.5 + settings['prior_alpha'] = math.pi/4 + + # For sampling + settings['seed'] = 1 + + + return settings \ No newline at end of file diff --git a/applications/hyperelasticity/hyperelasticityModelUtilities.py b/applications/hyperelasticity/hyperelasticityModelUtilities.py new file mode 100644 index 0000000..9abfce8 --- /dev/null +++ b/applications/hyperelasticity/hyperelasticityModelUtilities.py @@ -0,0 +1,443 @@ +# This file is part of the dino package +# +# dino is free software: you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation, either version 2 of the License, or any later version. +# +# dino is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# If not, see . +# +# Author: Tom O'Leary-Roseberry +# Contact: tom.olearyroseberry@utexas.edu + +import dolfin as dl +import ufl +import math +import numpy as np +import matplotlib.pyplot as plt +import sys, os +import time +sys.path.append( os.environ.get('HIPPYLIB_PATH', "../") ) +import hippylib as hp + +sys.path.append( os.environ.get('HIPPYFLOW_PATH')) +import hippyflow as hf + +import logging +logging.getLogger('FFC').setLevel(logging.WARNING) +logging.getLogger('UFL').setLevel(logging.WARNING) +dl.set_log_active(False) + +np.random.seed(seed=0) + +from hyperelasticityModelSettings import hyperelasticity_problem_settings + + +def HyperelasticityPrior(Vh_PARAMETER, correlation_length, mean=None, anis_diff=None): + var = correlation_length / 0.16 + # Delta and gamma + delta = (var * correlation_length) ** (-0.5) + gamma = delta * correlation_length ** 2 + if anis_diff is None: + theta0 = 1. + theta1 = 1. + alpha = math.pi / 4. + anis_diff = dl.CompiledExpression(hp.ExpressionModule.AnisTensor2D(), degree=1) + anis_diff.set(theta0, theta1, alpha) + if mean is None: + return hp.BiLaplacianPrior(Vh_PARAMETER, gamma, delta, anis_diff, robin_bc=True) + else: + return hp.BiLaplacianPrior(Vh_PARAMETER, gamma, delta, anis_diff, mean=mean, robin_bc=True) + + +class hyperelasticity_varf: + + def __init__(self,Vh,my_ds,traction,body_force=None,mean = None): + self.Vh = Vh + self.my_ds = my_ds + self.traction = traction + if body_force is None: + self.body_force = dl.Constant((0.0,0.0)) + else: + self.body_force = body_force + + if mean is None: + self._mean_function = dl.Constant(0.0) + else: + self.mean_function = dl.Constant(mean) + + def __call__(self,u,m,p): + d = u.geometric_dimension() + Id = dl.Identity(d) + F = Id + dl.grad(u) + C = F.T*F + + # Lame parameters + E = dl.exp(m) + dl.Constant(1.0) + nu = 0.4 + mu = E/(2.0*(1.0 + nu)) + lmbda = (E*nu)/((1.0+nu)*(1.0 - 2.0*nu)) + + # Invariants of the deformation tensors + Ic, J = dl.tr(C), dl.det(F) + + # Stored strain energy density + psi = (mu/2.0)*(Ic - 3.0) - mu*dl.ln(J) + (lmbda/2.0)*(dl.ln(J))**2 + + # Total potential energy: + Pi = psi*dl.dx + dl.dot(self.body_force,u)*dl.dx + dl.dot(self.traction,u)*self.my_ds(1) + + return dl.derivative(Pi,u,p) + + +sys.path.append( os.environ.get('HIPPYFLOW_PATH')) +from hippyflow import LinearStateObservable, StateSpaceIdentityOperator + + + +def hyperelasticity_state_observable(mesh,output_folder ='hyperelasticity_setup/',verbose = False,seed = 0): + + ######################################################################## + #####Set up the mesh and finite element spaces######################### + + Vh2 = dl.VectorFunctionSpace(mesh, 'Lagrange', 1) + Vh1 = dl.FunctionSpace(mesh, 'Lagrange', 1) + Vh = [Vh2, Vh1, Vh2] + print( "Number of dofs: STATE={0}, PARAMETER={1}, ADJOINT={2}".format( + Vh[hp.STATE].dim(), Vh[hp.PARAMETER].dim(), Vh[hp.ADJOINT].dim()) ) + + def left_boundary(x, on_boundary): + return on_boundary and ( x[0] < dl.DOLFIN_EPS) + + def right_boundary(x, on_boundary): + return on_boundary and (x[0] > 1.0 - dl.DOLFIN_EPS) + + u_left = dl.Constant((0.0,0.0)) + + # u_bdr = dl.Expression("x[1]", degree=1) + u_bdr0 = dl.Constant((0.0,0.0)) + bc = dl.DirichletBC(Vh[hp.STATE], u_left, left_boundary) + bc0 = dl.DirichletBC(Vh[hp.STATE], u_bdr0, left_boundary) + + # Traction boundary conditions + + boundary_subdomains = dl.MeshFunction("size_t",mesh,mesh.topology().dim()-1) + boundary_subdomains.set_all(0) + + dl.AutoSubDomain(right_boundary).mark(boundary_subdomains,1) + + my_ds = dl.ds(subdomain_data = boundary_subdomains) + + right_traction_expr = dl.Expression(("a*exp(-1.0*pow(x[1] - 0.5,2)/b)", "c*(1.0 + (x[1]/d))"), a=0.06, b=4, c=0.03, d=10,degree=5) + right_t = dl.interpolate(right_traction_expr,Vh[hp.STATE]) + + pde_varf = hyperelasticity_varf(Vh,my_ds,right_t) + + pde = CustomNonlinearPDEProblem(Vh, pde_varf, bc, bc0, is_fwd_linear=False) + + u_trial = dl.TrialFunction(Vh[hp.STATE]) + u_test = dl.TestFunction(Vh[hp.STATE]) + + M = dl.PETScMatrix() + dl.assemble(dl.inner(u_trial,u_test)*dl.dx, tensor=M) + + B = StateSpaceIdentityOperator(M) + # B = StateSpaceIdentityOperator() + + observable = LinearStateObservable(pde,B) + + return observable + + +def hyperelasticity_pointwise_observable(mesh,output_folder ='hyperelasticity_setup/',verbose = False,seed = 0,\ + nx_targets = 10,ny_targets = 10): + + ######################################################################## + #####Set up the mesh and finite element spaces######################### + + Vh2 = dl.VectorFunctionSpace(mesh, 'Lagrange', 1) + Vh1 = dl.FunctionSpace(mesh, 'Lagrange', 1) + Vh = [Vh2, Vh1, Vh2] + print( "Number of dofs: STATE={0}, PARAMETER={1}, ADJOINT={2}".format( + Vh[hp.STATE].dim(), Vh[hp.PARAMETER].dim(), Vh[hp.ADJOINT].dim()) ) + + def left_boundary(x, on_boundary): + return on_boundary and ( x[0] < dl.DOLFIN_EPS) + + def right_boundary(x, on_boundary): + return on_boundary and (x[0] > 1.0 - dl.DOLFIN_EPS) + + u_left = dl.Constant((0.0,0.0)) + + # u_bdr = dl.Expression("x[1]", degree=1) + u_bdr0 = dl.Constant((0.0,0.0)) + bc = dl.DirichletBC(Vh[hp.STATE], u_left, left_boundary) + bc0 = dl.DirichletBC(Vh[hp.STATE], u_bdr0, left_boundary) + + # Traction boundary conditions + + boundary_subdomains = dl.MeshFunction("size_t",mesh,mesh.topology().dim()-1) + boundary_subdomains.set_all(0) + + dl.AutoSubDomain(right_boundary).mark(boundary_subdomains,1) + + my_ds = dl.ds(subdomain_data = boundary_subdomains) + + right_traction_expr = dl.Expression(("a*exp(-1.0*pow(x[1] - 0.5,2)/b)", "c*(1.0 + (x[1]/d))"), a=0.06, b=4, c=0.03, d=10,degree=5) + right_t = dl.interpolate(right_traction_expr,Vh[hp.STATE]) + + pde_varf = hyperelasticity_varf(Vh,my_ds,right_t) + + pde = CustomNonlinearPDEProblem(Vh, pde_varf, bc, bc0, is_fwd_linear=False) + + x_targets = np.linspace(0.1,0.9,nx_targets) + y_targets = np.linspace(0.1,0.9,ny_targets) + targets = [] + for xi in x_targets: + for yi in y_targets: + targets.append((xi,yi)) + targets = np.array(targets) + ntargets = targets.shape[0] + + if verbose: + print( "Number of observation points: {0}".format(ntargets) ) + + B = hp.assemblePointwiseObservation(Vh[hp.STATE], targets) + + # u_trial = dl.TrialFunction(Vh[hp.STATE]) + # u_test = dl.TestFunction(Vh[hp.STATE]) + + # M = dl.PETScMatrix() + # dl.assemble(dl.inner(u_trial,u_test)*dl.dx, tensor=M) + + # B = StateSpaceIdentityOperator(M) + # B = StateSpaceIdentityOperator() + + observable = LinearStateObservable(pde,B) + + return observable + +class CustomNonlinearPDEProblem(hp.PDEVariationalProblem): + + def solveFwd(self,state,x): + u = hp.vector2Function(x[hp.STATE],self.Vh[hp.STATE]) + m = hp.vector2Function(x[hp.PARAMETER],self.Vh[hp.PARAMETER]) + p = dl.TestFunction(self.Vh[hp.ADJOINT]) + + F = self.varf_handler(u,m,p) + du = dl.TrialFunction(self.Vh[hp.STATE]) + JF = dl.derivative(F, u, du) + + if True: + # Assert that we are in a serial instance + assert self.Vh[hp.STATE].mesh().mpi_comm().size == 1, print('Only worked out for serial codes') + # Parameters for the problem + tolerance = 1e-4 + max_iterations = 200 + # Line search + line_search = True + max_backtrack = 10 + alpha0 = 1.0 + # Printing + verbose = False + + + # Initialize while loop variables + converged = False + iteration = 0 + delta_u = self.generate_state() + delta_u.zero() + while not converged: + iteration += 1 + # from scipy.sparse import csc_matrix, csr_matrix + # from scipy.sparse import linalg as spla + import scipy.sparse as sp + import scipy.sparse.linalg as spla + # Assemble A and enforce BCS + A_dl,b_dl = dl.assemble_system(JF,F,bcs =self.bc0) # bc0 is the state variable bc + residual_norm = dl.norm(b_dl) + if verbose: + print('At iteration ',iteration,'the residual norm = ',residual_norm) + converged = (residual_norm < tolerance) + + A_mat = dl.as_backend_type(A_dl).mat() + row,col,val = A_mat.getValuesCSR() # I think they only give the csr, so we convert + A_csc = sp.csr_matrix((val,col,row)).tocsc() + t0 = time.time() + A_lu = spla.splu(A_csc) + if verbose: + print('Sparse LU build took ',time.time() - t0,'s for iteration',iteration) + t1 = time.time() + delta_u.set_local(A_lu.solve(-b_dl.get_local())) + if verbose: + print('Sparse LU solve took',time.time() - t1,'s') + + + if line_search: + alpha = alpha0 + + backtrack_iteration = 0 + searching = True + while searching: + backtrack_iteration += 1 + u.vector().axpy(alpha,delta_u) + res_new = dl.assemble(F) + for bc in self.bc0: + bc.apply(res_new) + # This is not sufficient descent, just descent, possibly problematic + if dl.norm(res_new) < residual_norm: + searching = False + else: + if verbose: + print('Need to take a smaller step') + u.vector().axpy(-alpha,delta_u) + alpha *=0.5 + if backtrack_iteration > max_backtrack: + break + + else: + u.vector().axpy(1.,delta_u) + + if iteration > max_iterations: + print('Maximum iterations for nonlinear PDE solve reached, moving on.') + print('Final residual norm = ',residual_norm) + break + + # Meets the interface condition for `solveFwd` + state.zero() + state.axpy(1.,u.vector()) + + + else: + F = self.varf_handler(u,m,p) + du = dl.TrialFunction(self.Vh[hp.STATE]) + JF = dl.derivative(F, u, du) + problem = dl.NonlinearVariationalProblem(F,u,self.bc,JF) + solver = dl.NonlinearVariationalSolver(problem) + + prm = solver.parameters + # print('newton solver parameters = ',solver.parameters['newton_solver'].keys()) + if False: + prm['newton_solver']['absolute_tolerance'] = 1E-4 + prm['newton_solver']['report'] = True + prm['newton_solver']['relative_tolerance'] = 1E-3 + prm['newton_solver']['maximum_iterations'] = 200 + prm['newton_solver']['relaxation_parameter'] = 1.0 + # print(dl.info(solver.parameters, True)) + if True: + prm['nonlinear_solver']='snes' + prm['snes_solver']['line_search'] = 'basic' + prm['snes_solver']['linear_solver']= 'lu' + prm['snes_solver']['report'] = False + prm['snes_solver']['error_on_nonconvergence'] = True + prm['snes_solver']['absolute_tolerance'] = 1E-5 + prm['snes_solver']['relative_tolerance'] = 1E-5 + prm['snes_solver']['maximum_iterations'] = 50 + prm['newton_solver']['absolute_tolerance'] = 1E-3 + prm['newton_solver']['relative_tolerance'] = 1E-2 + prm['newton_solver']['maximum_iterations'] = 200 + prm['newton_solver']['relaxation_parameter'] = 1.0 + + # print(dl.info(solver.parameters, True)) + iterations, converged = solver.solve() + + state.zero() + state.axpy(1.,u.vector()) + +def hyperelasticity_model(settings = hyperelasticity_problem_settings()): + # Set up the mesh, finite element spaces, and PDE forward problem + ndim = settings['ndim'] + nx = settings['nx'] + ny = settings['ny'] + mesh = dl.UnitSquareMesh(nx, ny) + Vh2 = dl.VectorFunctionSpace(mesh, 'Lagrange', 1) + Vh1 = dl.FunctionSpace(mesh, 'Lagrange', 1) + Vh = [Vh2, Vh1, Vh2] + print( "Number of dofs: STATE={0}, PARAMETER={1}, ADJOINT={2}".format( + Vh[hp.STATE].dim(), Vh[hp.PARAMETER].dim(), Vh[hp.ADJOINT].dim()) ) + + # Now for the PDE formulation + + # Dirichlet boundary conditions + + def left_boundary(x, on_boundary): + return on_boundary and ( x[0] < dl.DOLFIN_EPS) + + def right_boundary(x, on_boundary): + return on_boundary and (x[0] > 1.0 - dl.DOLFIN_EPS) + + u_left = dl.Constant((0.0,0.0)) + + # u_bdr = dl.Expression("x[1]", degree=1) + u_bdr0 = dl.Constant((0.0,0.0)) + bc = dl.DirichletBC(Vh[hp.STATE], u_left, left_boundary) + bc0 = dl.DirichletBC(Vh[hp.STATE], u_bdr0, left_boundary) + + # Traction boundary conditions + + boundary_subdomains = dl.MeshFunction("size_t",mesh,mesh.topology().dim()-1) + boundary_subdomains.set_all(0) + + dl.AutoSubDomain(right_boundary).mark(boundary_subdomains,1) + + my_ds = dl.ds(subdomain_data = boundary_subdomains) + + right_traction_expr = dl.Expression(("a*exp(-1.0*pow(x[1] - 0.5,2)/b)", "c*(1.0 + (x[1]/d))"),\ + a=0.06, b=4, c=0.03, d=10, degree=5) + right_t = dl.interpolate(right_traction_expr,Vh[hp.STATE]) + + pde_varf = hyperelasticity_varf(Vh,my_ds,right_t) + + pde = CustomNonlinearPDEProblem(Vh, pde_varf, bc, bc0, is_fwd_linear=False) + + # Set up the prior + + correlation = settings['correlation_length'] + mean_function = dl.project(dl.Constant(0.37), Vh[hp.PARAMETER]) + prior = HyperelasticityPrior(Vh[hp.PARAMETER], correlation, mean = mean_function.vector()) + + + # Set up the observation and misfits + nx_targets = settings['nx_targets'] + ny_targets = settings['ny_targets'] + ntargets = nx_targets*ny_targets + assert ntargets == settings['ntargets'], 'Inconsistent target dimensions in settings' + + #Targets only on the bottom + x_targets = np.linspace(0.1,0.9,nx_targets) + y_targets = np.linspace(0.1,0.9,ny_targets) + targets = [] + for xi in x_targets: + for yi in y_targets: + targets.append((xi,yi)) + targets = np.array(targets) + + print( "Number of observation points: {0}".format(ntargets) ) + + misfit = hp.PointwiseStateObservation(Vh[hp.STATE], targets) + + model = hp.Model(pde, prior, misfit) + model.targets = targets + + return model + +def hyperelasticity_model_wrapper(settings = hyperelasticity_problem_settings()): + model = hyperelasticity_model(settings = settings) + + mw_settings = hf.hippylibModelWrapperSettings() + mw_settings['rel_noise'] = settings['rel_noise'] + mw_settings['seed'] = settings['seed'] + + modelwrapper = hf.hippylibModelWrapper(model,settings = mw_settings) + if hasattr(model,'targets'): + modelwrapper.targets = model.targets + # Setup the inverse problem + # modelwrapper.setUpInverseProblem() + + return modelwrapper + diff --git a/applications/hyperelasticity/hyperelasticityProblemSetup.py b/applications/hyperelasticity/hyperelasticityProblemSetup.py new file mode 100644 index 0000000..fdac6f5 --- /dev/null +++ b/applications/hyperelasticity/hyperelasticityProblemSetup.py @@ -0,0 +1,267 @@ +# This file is part of the dino package +# +# dino is free software: you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation, either version 2 of the License, or any later version. +# +# dino is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# If not, see . +# +# Author: Tom O'Leary-Roseberry +# Contact: tom.olearyroseberry@utexas.edu + + +import dolfin as dl +dl.set_log_active(False) +import numpy as np +from mpi4py import MPI +import time +import pickle +import argparse +from pympler import asizeof + + +import sys, os +sys.path.append( os.environ.get('HIPPYLIB_PATH')) +import hippylib as hp +sys.path.append( os.environ.get('HIPPYFLOW_PATH')) +from hippyflow import * + +# from hyperelasticityModelSettings import * +from hyperelasticityModelUtilities import * + +import pickle +def save_logger(logger,filename = 'error_data.pkl'): + with open(output_directory+filename, 'wb+') as f: + pickle.dump(logger, f, pickle.HIGHEST_PROTOCOL) + + +parser = argparse.ArgumentParser(description='Process some integers.') +parser.add_argument('-ninstance',dest = 'ninstance',required= False,default = 1,help='number of instances',type = int) +parser.add_argument('-nsubdomain',dest = 'nsubdomain',required= False,default = 1,help='number of partition',type = int) +parser.add_argument('-sample_per',dest = 'sample_per',required= False,default = 4*64,help='number of samples per instance',type = int) +parser.add_argument('-data_per_process',dest = 'data_per_process',required= False,default = 4*2048,help='number of data generated per instance',type = int) +parser.add_argument('-as_rank',dest = 'as_rank',required= False,default = 500,help='rank for active subspace projectors',type = int) +parser.add_argument('-jacobian_rank',dest = 'jacobian_rank',required= False,default = 100,help='fixed local rank for jacobians',type = int) +parser.add_argument('-pod_rank',dest = 'pod_rank',required= False,default = 500,help='rank for POD projectors',type = int) +parser.add_argument('-nx_targets',dest = 'nx_targets',required= False,default = 10,help='nx targets for observable',type = int) +parser.add_argument('-ny_targets',dest = 'ny_targets',required= False,default = 5,help='ny targets for observable',type = int) +parser.add_argument('-nx',dest = 'nx',required= False,default = 64,help='targets for observable',type = int) +parser.add_argument('-ny',dest = 'ny',required= False,default = 64,help='targets for observable',type = int) +parser.add_argument('-correlation_length',dest = 'correlation_length',required=False,default = 0.3, help="gamma for matern prior",type=float) +parser.add_argument('-formulation',dest = 'formulation',required=False,default = 'hyperelasticity', help="formulation name string",type=str) +parser.add_argument('-save_data',dest = 'save_data',\ + required= False,default = 0,help='boolean for saving of data',type = int) +parser.add_argument('-save_jacobian_data',dest = 'save_jacobian_data',\ + required= False,default = 1,help='boolean for saving of Jacobian data',type = int) +parser.add_argument('-save_pod',dest = 'save_pod',\ + required= False,default = 0,help='boolean for saving of POD projectors',type = int) +parser.add_argument('-save_as',dest = 'save_as',\ + required= False,default = 1,help='boolean for saving of active subspace projectors',type = int) +parser.add_argument('-save_kle',dest = 'save_kle',\ + required= False,default = 1,help='boolean for saving of KLE projectors',type = int) +parser.add_argument('-save_two_states',dest = 'save_two_states',\ + required= False,default = 0,help='boolean for savign solution at mean and draw',type = int) +parser.add_argument('-save_errors',dest = 'save_errors',\ + required= False,default = 0,help='boolean for savign solution at mean and draw',type = int) + + +args = parser.parse_args() + +# Check parallelism things +world = MPI.COMM_WORLD +world_size = world.size +assert world_size == args.nsubdomain*args.ninstance +my_rank = world.rank +if my_rank == 0: + print(('World size is '+str(world_size)).center(80)) + + +# Instantiate split communicators and collectives +mesh_constructor_comm, collective_comm = splitCommunicators(world,args.nsubdomain,args.ninstance) + +my_collective = MultipleSamePartitioningPDEsCollective(collective_comm) + +# Initialize directories for saving data +n_targets = int(args.ny_targets*args.nx_targets) +output_directory = 'data/'+args.formulation+'_nobs_'+str(n_targets)+'_correlation_length'+str(args.correlation_length)+'_nx'+str(args.nx)+'/' +os.makedirs(output_directory,exist_ok = True) +save_states_dir = output_directory+'save_states/' + +# Instantiate hyperelasticity linear observable +mesh = dl.UnitSquareMesh(mesh_constructor_comm, args.nx, args.ny) +observable_kwargs = {'nx_targets':args.nx_targets,'ny_targets':args.ny_targets,'output_folder':save_states_dir} +# No longer needs targets. +# observable_kwargs = {'output_folder':save_states_dir} +# observable = hyperelasticity_state_observable(mesh,**observable_kwargs) +observable = hyperelasticity_pointwise_observable(mesh,**observable_kwargs) +# Matern Covariance Prior is instantiated here so that each process can sample m. +Vh = observable.problem.Vh + +correlation = args.correlation_length +mean_function = dl.project(dl.Constant(0.37), Vh[hp.PARAMETER]) +prior = HyperelasticityPrior(Vh[hp.PARAMETER], correlation, mean = mean_function.vector()) + +metadata = {} + + +if False: + # Save mass matrix + u_trial = dl.TrialFunction(Vh[hp.STATE]) + u_test = dl.TestFunction(Vh[hp.STATE]) + M = dl.PETScMatrix() + dl.assemble(dl.inner(u_trial,u_test)*dl.dx, tensor=M) + M_numpy = M.array() + + # print(M_local.shape) + # print('dir(M) = ',dir(M)) + + print('Total entries M = ', np.prod(M_numpy.shape)) + print('Total nonzeros M = ',np.count_nonzero(M_numpy)) + np.save(output_directory+'mass_matrix.npy',M_numpy) + + # Save stiffness matrix + K = dl.PETScMatrix() + dl.assemble(dl.inner(dl.grad(u_trial),dl.grad(u_test))*dl.dx, tensor=K) + K_numpy = K.array() + + print('Total entries K = ', np.prod(K_numpy.shape)) + print('Total nonzeros K = ',np.count_nonzero(K_numpy)) + np.save(output_directory+'stiffness_matrix.npy',K_numpy) + + + + +# Build Active Subspace +if args.save_as or args.save_jacobian_data: + AS_parameters = ActiveSubspaceParameterList() + AS_parameters['observable_constructor'] = hyperelasticity_state_observable + AS_parameters['observable_kwargs'] = observable_kwargs + AS_parameters['output_directory'] = output_directory + AS_parameters['samples_per_process'] = args.sample_per + AS_parameters['jacobian_data_per_process'] = args.data_per_process + AS_parameters['jacobian_rank'] = args.jacobian_rank + AS_parameters['plot_label_suffix'] = r'correlation length = '+str(args.correlation_length) + AS_parameters['rank'] = args.as_rank + AS = ActiveSubspaceProjector(observable,prior, mesh_constructor_comm = mesh_constructor_comm,collective = my_collective,parameters = AS_parameters) + +# Construct input and output subspaces +if args.save_as: + AS.construct_input_subspace() + AS.construct_output_subspace() + + metadata['as_input_time'] = AS._input_subspace_construction_time + metadata['as_output_time'] = AS._output_subspace_construction_time + +# Karhunen-Lo\`{e}ve Expansion +if args.save_kle: + + KLE_parameters = KLEParameterList() + KLE_parameters['output_directory'] = output_directory + KLE_parameters['plot_label_suffix'] = r'correlation length = '+str(args.correlation_length) + KLE = KLEProjector(prior,\ + mesh_constructor_comm = mesh_constructor_comm,collective = my_collective,parameters = KLE_parameters) + KLE.construct_input_subspace() + + metadata['kle_time'] = KLE._subspace_construction_time + + + +# Proper Orthogonal Decomposition +if args.save_data or args.save_pod or args.save_two_states: + print('POD object being created') + POD_parameters = PODParameterList() + POD_parameters['data_per_process'] = args.data_per_process + POD_parameters['output_directory'] = output_directory + POD_parameters['plot_label_suffix'] = r'correlation length = '+str(args.correlation_length) + POD = PODProjector(observable,prior,\ + mesh_constructor_comm = mesh_constructor_comm,collective = my_collective,parameters = POD_parameters) + +# Save the POD projector +if args.save_pod: + print(80*'#') + print('Building POD projector') + POD.parameters['rank'] = args.pod_rank + POD.construct_subspace() + + metadata['pod_time'] = POD._subspace_construction_time + + + +# Test Errors +if args.save_errors: + # Error Data + error_data = {} + input_ranks = [8,16,32,64,128] + error_data['input_ranks'] = input_ranks + + rank_pairs = [(1,1),(2,2),(4,4)] + [(8*i,8*i) for i in range(1,17)] + + error_data['rank_pairs'] = rank_pairs + + if args.save_as: + print(80*'#') + print('Testing error for AS input bound'.center(80)) + + error_data['AS_input_errors'], error_data['AS_output_errors'] = AS.test_errors(ranks = input_ranks) + if args.save_pod: + print(80*'#') + print('Testing error for AS input-output error bound'.center(80)) + error_data['as_input_output'] = POD.input_output_error_test(AS.V_GN,Cinv = AS.prior.R, rank_pairs = rank_pairs) + + if args.save_kle: + print(80*'#') + print('Testing error for KLE input-error bound'.center(80)) + error_data['KLE_input_errors'] = KLE.test_errors(ranks = input_ranks) + if args.save_pod: + print(80*'#') + print('Testing error for KLE input-output error bound'.center(80)) + error_data['kle_input_output'] = POD.input_output_error_test(KLE.V_KLE,Cinv = AS.prior.M, rank_pairs = rank_pairs) + + if True and args.save_pod: + print(80*'#') + print('Testing error for random input-output error bound'.center(80)) + Omega = KLE.random_input_projector() + error_data['random_input_output'] = POD.input_output_error_test(Omega,Cinv = AS.prior.M, rank_pairs = rank_pairs) + + save_logger(error_data) + + + +if args.save_data: + print(80*'#') + print('Made it to the POD data generation!') + POD.generate_training_data(output_directory) + + metadata['data_time'] = POD._data_generation_time + + +if args.save_jacobian_data: + print(80*'#') + print('Made it to the Jacobian data generation!') + AS.construct_low_rank_Jacobians(output_directory+'jacobian_data/') + metadata['jacobian_data_time'] = AS._jacobian_data_generation_time + + +if args.save_two_states: + print(80*'#') + print('Saving two states') + POD.two_state_solution() + + +if True: + save_logger(metadata,filename='metadata.pkl') + + + + + + + + + diff --git a/applications/poisson/dino_paper/dino_training.py b/applications/poisson/dino_paper/dino_training.py index 828abb0..5c08156 100644 --- a/applications/poisson/dino_paper/dino_training.py +++ b/applications/poisson/dino_paper/dino_training.py @@ -59,12 +59,7 @@ parser.add_argument("-batch_rank", dest='batch_rank',required=False, default = 50, help="batch rank parameter used in sketching of Jacobian information",type=int) parser.add_argument("-l2_weight", dest='l2_weight',required=False, default = 1., help="weight for l2 term",type=float) parser.add_argument("-h1_weight", dest='h1_weight',required=False, default = 1., help="weight for h1 term",type=float) -parser.add_argument("-left_weight", dest='left_weight',required=False, default = 1., help="weight for left optimization constraint",type=float) -parser.add_argument("-right_weight", dest='right_weight',required=False, default = 1., help="weight for right optimization constraint",type=float) -# Constraint sketching -parser.add_argument("-constraint_sketching", dest='constraint_sketching',required=False, default = 0, help="to constraint sketch or not",type=int) -parser.add_argument("-output_sketch_dim", dest='output_sketch_dim',required=False, default = 50, help="output sketching rank",type=int) -parser.add_argument("-input_sketch_dim", dest='input_sketch_dim',required=False, default = 50, help="input sketching rank",type=int) + # Full J training parser.add_argument("-train_full_jacobian", dest='train_full_jacobian',required=False, default = 1, help="full J",type=int) diff --git a/applications/rdiff/dino_paper/dino_training.py b/applications/rdiff/dino_paper/dino_training.py index e22cbb4..e03dc12 100644 --- a/applications/rdiff/dino_paper/dino_training.py +++ b/applications/rdiff/dino_paper/dino_training.py @@ -59,12 +59,7 @@ parser.add_argument("-batch_rank", dest='batch_rank',required=False, default = 50, help="batch rank parameter used in sketching of Jacobian information",type=int) parser.add_argument("-l2_weight", dest='l2_weight',required=False, default = 1., help="weight for l2 term",type=float) parser.add_argument("-h1_weight", dest='h1_weight',required=False, default = 1., help="weight for h1 term",type=float) -parser.add_argument("-left_weight", dest='left_weight',required=False, default = 1., help="weight for left optimization constraint",type=float) -parser.add_argument("-right_weight", dest='right_weight',required=False, default = 1., help="weight for right optimization constraint",type=float) -# Constraint sketching -parser.add_argument("-constraint_sketching", dest='constraint_sketching',required=False, default = 0, help="to constraint sketch or not",type=int) -parser.add_argument("-output_sketch_dim", dest='output_sketch_dim',required=False, default = 50, help="output sketching rank",type=int) -parser.add_argument("-input_sketch_dim", dest='input_sketch_dim',required=False, default = 50, help="input sketching rank",type=int) + # Full J training parser.add_argument("-train_full_jacobian", dest='train_full_jacobian',required=False, default = 1, help="full J",type=int) diff --git a/dino/surrogate_construction/dataUtilities.py b/dino/surrogate_construction/dataUtilities.py index bf8dff8..e71d2f9 100644 --- a/dino/surrogate_construction/dataUtilities.py +++ b/dino/surrogate_construction/dataUtilities.py @@ -27,34 +27,34 @@ def load_data(data_dir,rescale = False,derivatives = False, n_data = np.inf): q_files = [] mq_files = [] for file in data_files: - if 'ms_on_rank_' in file: + if 'ms_on_proc_' in file: m_files.append(file) - if 'qs_on_rank_' in file: + if 'qs_on_proc_' in file: q_files.append(file) - if 'mq_on_rank' in file: + if 'mq_on_proc' in file: mq_files.append(file) if len(mq_files) == 0: - ranks = [int(file.split(data_dir+'ms_on_rank_')[-1].split('.npy')[0]) for file in m_files] + ranks = [int(file.split(data_dir+'ms_on_proc_')[-1].split('.npy')[0]) for file in m_files] else: - ranks = [int(file.split(data_dir+'mq_on_rank')[-1].split('.npz')[0]) for file in mq_files] + ranks = [int(file.split(data_dir+'mq_on_proc')[-1].split('.npz')[0]) for file in mq_files] max_rank = max(ranks) # Serially concatenate data if len(mq_files) == 0: - m_data = np.load(data_dir+'ms_on_rank_0.npy') - q_data = np.load(data_dir+'qs_on_rank_0.npy') + m_data = np.load(data_dir+'ms_on_proc_0.npy') + q_data = np.load(data_dir+'qs_on_proc_0.npy') for i in range(1,max_rank+1): - appendage_m = np.load(data_dir+'ms_on_rank_'+str(i)+'.npy') + appendage_m = np.load(data_dir+'ms_on_proc_'+str(i)+'.npy') m_data = np.concatenate((m_data,appendage_m)) - appendage_q = np.load(data_dir+'qs_on_rank_'+str(i)+'.npy') + appendage_q = np.load(data_dir+'qs_on_proc_'+str(i)+'.npy') q_data = np.concatenate((q_data,appendage_q)) else: - npz_data = np.load(data_dir+'mq_on_rank0.npz') + npz_data = np.load(data_dir+'mq_on_proc0.npz') m_data = npz_data['m_data'] q_data = npz_data['q_data'] for i in range(1,max_rank+1): - npz_data = np.load(data_dir+'mq_on_rank'+str(i)+'.npz') + npz_data = np.load(data_dir+'mq_on_proc'+str(i)+'.npz') appendage_m = npz_data['m_data'] appendage_q = npz_data['q_data'] m_data = np.concatenate((m_data,appendage_m)) @@ -77,40 +77,40 @@ def load_data(data_dir,rescale = False,derivatives = False, n_data = np.inf): V_files = [] J_files = [] for file in data_files: - if 'Us_on_rank_' in file: + if 'Us_on_proc_' in file: U_files.append(file) - if 'sigmas_on_rank_' in file: + if 'sigmas_on_proc_' in file: sigma_files.append(file) - if 'Vs_on_rank_' in file: + if 'Vs_on_proc_' in file: V_files.append(file) - if 'J_on_rank' in file: + if 'J_on_proc' in file: J_files.append(file) if len(J_files) == 0: - ranks = [int(file.split(data_dir+'sigmas_on_rank_')[-1].split('.npy')[0]) for file in sigma_files] + ranks = [int(file.split(data_dir+'sigmas_on_proc_')[-1].split('.npy')[0]) for file in sigma_files] else: - ranks = [int(file.split(data_dir+'J_on_rank')[-1].split('.npz')[0]) for file in J_files] + ranks = [int(file.split(data_dir+'J_on_proc')[-1].split('.npz')[0]) for file in J_files] max_rank = max(ranks) if len(J_files) == 0: # Serially concatenate derivative data - U_data = np.load(data_dir+'Us_on_rank_0.npy') - sigma_data = np.load(data_dir+'sigmas_on_rank_0.npy') - V_data = np.load(data_dir+'Vs_on_rank_0.npy') + U_data = np.load(data_dir+'Us_on_proc_0.npy') + sigma_data = np.load(data_dir+'sigmas_on_proc_0.npy') + V_data = np.load(data_dir+'Vs_on_proc_0.npy') for i in range(1,max_rank+1): - appendage_U = np.load(data_dir+'Us_on_rank_'+str(i)+'.npy') + appendage_U = np.load(data_dir+'Us_on_proc_'+str(i)+'.npy') U_data = np.concatenate((U_data,appendage_U)) - appendage_sigma = np.load(data_dir+'sigmas_on_rank_'+str(i)+'.npy') + appendage_sigma = np.load(data_dir+'sigmas_on_proc_'+str(i)+'.npy') sigma_data = np.concatenate((sigma_data,appendage_sigma)) - appendage_V = np.load(data_dir+'Vs_on_rank_'+str(i)+'.npy') + appendage_V = np.load(data_dir+'Vs_on_proc_'+str(i)+'.npy') V_data = np.concatenate((V_data,appendage_V)) else: - Jnpz_data = np.load(data_dir+'J_on_rank0.npz') + Jnpz_data = np.load(data_dir+'J_on_proc0.npz') U_data = Jnpz_data['U_data'] sigma_data = Jnpz_data['sigma_data'] V_data = Jnpz_data['V_data'] for i in range(1,max_rank+1): - Jnpz_data = np.load(data_dir+'J_on_rank'+str(i)+'.npz') + Jnpz_data = np.load(data_dir+'J_on_proc'+str(i)+'.npz') appendage_U = Jnpz_data['U_data'] appendage_sigma = Jnpz_data['sigma_data'] appendage_V = Jnpz_data['V_data'] diff --git a/dino/surrogate_construction/jacobianConstruction.py b/dino/surrogate_construction/jacobianConstruction.py index 2ce6d3d..9adbd69 100644 --- a/dino/surrogate_construction/jacobianConstruction.py +++ b/dino/surrogate_construction/jacobianConstruction.py @@ -46,15 +46,6 @@ def jacobian_network_settings(problem_settings): jacobian_settings['batch_rank'] = 10 jacobian_settings['target_rank'] = 50 jacobian_settings['train_full_jacobian'] = False - jacobian_settings['nullspace_constraints'] = False - jacobian_settings['constraint_sketching'] = False - jacobian_settings['input_sketch_dim'] = None - jacobian_settings['output_sketch_dim'] = None - jacobian_settings['constraint_seed'] = 0 - - # Data directory - jacobian_settings['data_dir'] = None - # Neural network architecture settings jacobian_settings['architecture'] = 'as_dense' @@ -99,11 +90,13 @@ def jacobian_network_settings(problem_settings): -def jacobian_training_driver(settings,verbose = True): +def jacobian_training_driver(settings, remapped_data = None, unflattened_data = None, verbose = True): ''' ''' - n_data = settings['train_data_size'] + settings['test_data_size'] - + for loss_weight in settings['opt_parameters']['loss_weights']: + assert loss_weight >= 0 + ################################################################################ + # Set up training and testing data. if settings['data_dir'] is None: data_dir = '../data/'+settings['problem_settings']['formulation']+'_n_obs_'+str(settings['problem_settings']['ntargets'])+\ '_g'+str(settings['problem_settings']['gamma'])+'_d'+str(settings['problem_settings']['delta'])+\ @@ -111,36 +104,52 @@ def jacobian_training_driver(settings,verbose = True): else: data_dir = settings['data_dir'] - assert os.path.isdir(data_dir), 'Directory does not exist'+data_dir - for loss_weight in settings['opt_parameters']['loss_weights']: - assert loss_weight >= 0 - - all_data = load_data(data_dir,rescale = False,n_data = n_data,derivatives = True) - - input_dim = all_data['m_data'].shape[-1] - output_dim = all_data['q_data'].shape[-1] - settings['input_dim'] = input_dim - settings['output_dim'] = output_dim + if remapped_data is None: + if unflattened_data is None: + n_data = settings['train_data_size'] + settings['test_data_size'] + all_data = load_data(data_dir,rescale = False,n_data = n_data,derivatives = True) + unflattened_train_dict, unflattened_test_dict = train_test_split(all_data,n_train = settings['train_data_size']) + else: + if len(unflattened_data) == 1: + unflattened_train_dict = unflattened_data[0] + unflattened_test_dict = None + else: + unflattened_train_dict, unflattened_test_dict = unflattened_data + if settings['train_full_jacobian']: + train_dict = remap_jacobian_data(unflattened_train_dict) + if unflattened_test_dict is None: + test_dict = None + else: + test_dict = remap_jacobian_data(unflattened_test_dict) + else: + train_dict = flatten_data(unflattened_train_dict,target_rank = settings['target_rank'],batch_rank = settings['batch_rank']) + if unflattened_test_dict is None: + test_dict = None + else: + test_dict = flatten_data(unflattened_test_dict,target_rank = settings['target_rank'],batch_rank = settings['batch_rank']) + else: + assert settings['train_full_jacobian'] + if len(remapped_data) == 1: + train_dict = remapped_data[0] + test_dict = None + else: + train_dict, test_dict = remapped_data - unflattened_train_dict, unflattened_test_dict = train_test_split(all_data,n_train = settings['train_data_size']) # If these assertions fail, then need to rethink the following logic - assert len(unflattened_train_dict['m_data'].shape) == 2 - assert len(unflattened_train_dict['q_data'].shape) == 2 - n_train,dM = unflattened_train_dict['m_data'].shape - n_test,dQ = unflattened_test_dict['q_data'].shape - - - if settings['train_full_jacobian']: - train_dict = remap_jacobian_data(unflattened_train_dict) - test_dict = remap_jacobian_data(unflattened_test_dict) + assert len(train_dict['m_data'].shape) == 2 + assert len(train_dict['q_data'].shape) == 2 + n_train,dM = train_dict['m_data'].shape + if test_dict is not None: + n_test,dQ = test_dict['q_data'].shape else: - # Flatten training data - train_dict = flatten_data(unflattened_train_dict,target_rank = settings['target_rank'],batch_rank = settings['batch_rank']) - # Flatten testing data - test_dict = flatten_data(unflattened_test_dict,target_rank = settings['target_rank'],batch_rank = settings['batch_rank']) - + n_test = 0 + dQ = train_dict['q_data'].shape[-1] + settings['input_dim'] = dM + settings['output_dim'] = dQ + ################################################################################ + # Set up the neural networks if settings['architecture'] in ['as_resnet','as_dense']: data_dict_pod = {'input_train':train_dict['m_data'], 'output_train':train_dict['q_data']} last_layer_weights = build_POD_layer_arrays(data_dict_pod,truncation_dimension = settings['truncation_dimension'],\ @@ -156,11 +165,8 @@ def jacobian_training_driver(settings,verbose = True): projector_dict['output'] = last_layer_weights else: projector_dict = None - - ################################################################################ - regressor = choose_network(settings,projector_dict) - + ################################################################################ if settings['initial_guess_path'] is not None: assert os.path.isfile(settings['initial_guess_path']), 'Trained weights may not exist as specified: '+str(settings['initial_guess_path']) import pickle @@ -174,28 +180,10 @@ def jacobian_training_driver(settings,verbose = True): settings['opt_parameters']['train_full_jacobian'] = settings['train_full_jacobian'] regressor = equip_model_with_full_jacobian(regressor,name_prefix = settings['name_prefix']) - elif settings['nullspace_constraints']: - print('Equipping nullspace constraint Jacobian') - settings['opt_parameters']['nullspace_constraints'] = settings['nullspace_constraints'] - regressor = equip_model_with_jacobian_and_constraints(regressor,settings['batch_rank'],constraint_sketching = settings['constraint_sketching'],\ - input_sketch_dim = settings['input_sketch_dim'],output_sketch_dim = settings['output_sketch_dim'],name_prefix = settings['name_prefix']) - if settings['constraint_sketching']: - rQ = settings['output_sketch_dim'] - rM = settings['input_sketch_dim'] - zero_train = np.zeros((n_train,rQ,rM)) - zero_test = np.zeros((n_test,rQ,rM)) - train_dict['zero_matrix'] = zero_train - test_dict['zero_matrix'] = zero_test - constraint_state = np.random.RandomState(seed = settings['constraint_seed']) - else: - zero_train = np.zeros((n_train,dQ,dM)) - zero_test = np.zeros((n_test,dQ,dM)) - train_dict['zero_matrix'] = zero_train - test_dict['zero_matrix'] = zero_test else: regressor = equip_model_with_sketched_jacobian(regressor,settings['batch_rank'],name_prefix = settings['name_prefix']) - + ################################################################################ print('Commencing training'.center(80)) if settings['shuffle_every_epoch']: settings['opt_parameters']['keras_epochs'] = settings['inner_epochs'] @@ -203,30 +191,6 @@ def jacobian_training_driver(settings,verbose = True): if verbose: print(('Running inner iteration '+str(epoch)).center(80)) train_dict = flatten_data(unflattened_train_dict,target_rank = settings['target_rank'],batch_rank = settings['batch_rank'],order_random = True,burn_in = epoch) - if settings['nullspace_constraints']: - # Need to add the zero data here to train_dict - if settings['constraint_sketching']: - train_dict['zero_matrix'] = zero_train - test_dict['zero_matrix'] = zero_test - rQ = settings['output_sketch_dim'] - rM = settings['input_sketch_dim'] - # Could instead have many qrs at once - output_sketch,_ = np.linalg.qr(constraint_state.randn(dQ,rQ)) - output_sketch_train = np.tile(output_sketch,(n_train,1,1)) - output_sketch_test = np.tile(output_sketch,(n_test,1,1)) - input_sketch,_ = np.linalg.qr(constraint_state.randn(dM,rM)) - input_sketch_train = np.tile(input_sketch,(n_train,1,1)) - input_sketch_test = np.tile(input_sketch,(n_test,1,1)) - - train_dict['left_sketch'] = output_sketch_train - train_dict['right_sketch'] = input_sketch_train - test_dict['left_sketch'] = output_sketch_test - test_dict['right_sketch'] = input_sketch_test - settings['opt_parameters']['constraint_sketching'] = True - - else: - train_dict['zero_matrix'] = zero_train - test_dict['zero_matrix'] = zero_test regressor = train_h1_network(regressor,train_dict,test_dict,opt_parameters = settings['opt_parameters'],verbose = True) pass else: @@ -336,92 +300,4 @@ def equip_model_with_full_jacobian(model,name_prefix = ''): return new_model -def equip_model_with_full_jacobian_and_right_nullspace(model,name_prefix = ''): - """ - """ - assert len(model.inputs) == 1 - assert len(model.outputs) == 1 - input_m = model.inputs[0] - output_q = model.outputs[0] - try: - input_dim = input_m.shape[-1].value - output_dim = output_q.shape[-1].value - except: - input_dim = input_m.shape[-1] - output_dim = output_q.shape[-1] - - # Assuming here that V \in \mathbb{R}^{d_M \times d_Q}, because Jacobian has full rank - # and d_Q < d_M - input_V = tf.keras.layers.Input(shape=(input_dim,output_dim),name=name_prefix+'_input_V') - - with tf.GradientTape(persistent = True) as tape: - tape.watch(input_m) - qout = model(input_m) - # Full batched Jacobian - fullJ = tape.batch_jacobian(qout,input_m) - - # Very important assumption is that V is orthonormal. - fullJV = tf.einsum('ijk,ikl->ijl',fullJ,input_V,name=name_prefix+'fullJV') - fullJVVT = tf.einsum('ijk,ilk->ijl',fullJV,input_V,name=name_prefix+'fullJVVT') - - new_model = tf.keras.models.Model([input_m, input_V],[output_q,fullJ, fullJVVT]) - - return new_model - - -def equip_model_with_jacobian_and_constraints(model,batch_rank,constraint_sketching = False,\ - output_sketch_dim = None,input_sketch_dim = None,name_prefix = ''): - """ - """ - assert len(model.inputs) == 1 - assert len(model.outputs) == 1 - input_m = model.inputs[0] - output_q = model.outputs[0] - try: - input_dim = input_m.shape[-1].value - output_dim = output_q.shape[-1].value - except: - input_dim = input_m.shape[-1] - output_dim = output_q.shape[-1] - - if constraint_sketching: - assert output_sketch_dim is not None - assert input_sketch_dim is not None - assert type(output_sketch_dim) is int - assert type(input_sketch_dim) is int - left_sketch = tf.keras.layers.Input(shape=(output_dim,output_sketch_dim),name=name_prefix+'_left_sketch') - right_sketch = tf.keras.layers.Input(shape=(input_dim,input_sketch_dim),name=name_prefix+'_right_sketch') - - - input_U = tf.keras.layers.Input(shape=(output_dim,batch_rank),name=name_prefix+'_input_U') - input_V = tf.keras.layers.Input(shape=(input_dim,batch_rank),name=name_prefix+'_input_V') - - with tf.GradientTape(persistent = True) as tape: - tape.watch(input_m) - qout = model(input_m) - # Full batched Jacobian - fullJ = tape.batch_jacobian(qout,input_m) - # Right condition - fullJV = tf.einsum('ijk,ikl->ijl',fullJ,input_V) - fullJVVT = tf.einsum('ijk,ilk->ijl',fullJV,input_V) - right_condition = fullJ - fullJVVT - # Left condition - UTfullJ = tf.einsum('ijk,ijl->ikl',input_U,fullJ) - UUTfullJ = tf.einsum('ijk,ikl->ijl',input_U,UTfullJ) - left_condition = fullJ - UUTfullJ - - - # Singular value condition - UTfullJV = tf.einsum('ijk,ijl->ikl',input_U,fullJV) - - if constraint_sketching: - left_condition = tf.einsum('ijk,ijl->ilk',left_condition,left_sketch) - left_condition = tf.einsum('ijk,ikl->ijl',left_condition,right_sketch) - right_condition = tf.einsum('ijk,ijl->ilk',right_condition,left_sketch) - right_condition = tf.einsum('ijk,ikl->ijl',right_condition,right_sketch) - new_model = tf.keras.models.Model([input_m,input_U,input_V,left_sketch,right_sketch], [output_q,UTfullJV,left_condition,right_condition]) - else: - new_model = tf.keras.models.Model([input_m,input_U,input_V], [output_q,UTfullJV,left_condition,right_condition]) - - return new_model diff --git a/dino/surrogate_construction/neuralNetworks.py b/dino/surrogate_construction/neuralNetworks.py index d7b7c4f..c65d584 100644 --- a/dino/surrogate_construction/neuralNetworks.py +++ b/dino/surrogate_construction/neuralNetworks.py @@ -45,8 +45,8 @@ def low_rank_layer(input_x,rank = 8,activation = 'softplus',name_prefix = None,z -def projected_resnet(input_projector,last_layer_weights,ranks = [],\ - trainable = False, name_prefix = ''): +def projected_resnet(input_projector,last_layer_weights,ranks = [],compat_layer = False,\ + first_layer_trainable = False, last_layer_trainable = True, name_prefix = ''): """ """ input_dim,reduced_input_dim = input_projector.shape @@ -78,6 +78,8 @@ def projected_resnet(input_projector,last_layer_weights,ranks = [],\ # If not handling a dimension mismatch before then do it here. # z = tf.keras.layers.Dense(reduced_output_dim)(z) + if compat_layer: + z = tf.keras.layers.Dense(reduced_output_dim,name = name_prefix+'output_compat_layer',use_bias = False)(z) output_layer = tf.keras.layers.Dense(output_dim,name = name_prefix+'output_layer')(z) @@ -85,10 +87,10 @@ def projected_resnet(input_projector,last_layer_weights,ranks = [],\ ######################################################################## # Modify input layer by setting weights and setting trainable boolean - regressor.get_layer(name_prefix+'input_proj_layer').trainable = trainable + regressor.get_layer(name_prefix+'input_proj_layer').trainable = first_layer_trainable regressor.get_layer(name_prefix+'input_proj_layer').set_weights([input_projector]) - regressor.get_layer(name_prefix + 'output_layer').trainable = True + regressor.get_layer(name_prefix + 'output_layer').trainable = last_layer_trainable regressor.get_layer(name_prefix + 'output_layer').set_weights(last_layer_weights) return regressor @@ -97,7 +99,7 @@ def projected_resnet(input_projector,last_layer_weights,ranks = [],\ def projected_dense(input_projector,last_layer_weights,hidden_layer_dimensions = [],\ - trainable = False, name_prefix = ''): + compat_layer = False, first_layer_trainable = False, last_layer_trainable = True, name_prefix = ''): """ """ input_dim,reduced_input_dim = input_projector.shape @@ -116,14 +118,18 @@ def projected_dense(input_projector,last_layer_weights,hidden_layer_dimensions = z = tf.keras.layers.Dense(reduced_input_dim,activation = 'softplus',name=name_prefix + 'dense_reduction_layer')(input_proj_layer) for i,hidden_layer_dimension in enumerate(hidden_layer_dimensions): z = tf.keras.layers.Dense(hidden_layer_dimension,activation = 'softplus',name = name_prefix+'inner_layer_'+str(i))(z) + + if compat_layer: + z = tf.keras.layers.Dense(reduced_output_dim,name = name_prefix+'output_compat_layer',use_bias = False)(z) + output_layer = tf.keras.layers.Dense(output_dim,name = name_prefix + 'output_layer')(z) regressor = tf.keras.models.Model(input_data,output_layer,name = 'output_proj_layer') - regressor.get_layer(name_prefix+'input_proj_layer').trainable = trainable + regressor.get_layer(name_prefix+'input_proj_layer').trainable = first_layer_trainable regressor.get_layer(name_prefix+'input_proj_layer').set_weights([input_projector]) - regressor.get_layer(name_prefix + 'output_layer').trainable = True + regressor.get_layer(name_prefix + 'output_layer').trainable = last_layer_trainable regressor.get_layer(name_prefix + 'output_layer').set_weights(last_layer_weights) return regressor diff --git a/dino/surrogate_construction/trainingUtilities.py b/dino/surrogate_construction/trainingUtilities.py index 5213bb0..0b7ef2c 100644 --- a/dino/surrogate_construction/trainingUtilities.py +++ b/dino/surrogate_construction/trainingUtilities.py @@ -67,10 +67,7 @@ def network_training_parameters(): opt_parameters = {} # How to weight the least squares losses [l2,h1 seminorm] opt_parameters['loss_weights'] = [1.0,1.0] - opt_parameters['nullspace_constraints'] = False - - opt_parameters['constraint_sketching'] = False - opt_parameters['train_full_jacobian'] = False + opt_parameters['train_full_jacobian'] = True # Keras training parameters opt_parameters['train_keras'] = True @@ -93,7 +90,7 @@ def network_training_parameters(): return opt_parameters -def train_h1_network(network,train_dict,test_dict,opt_parameters = network_training_parameters(),verbose = True): +def train_h1_network(network,train_dict,test_dict = None,opt_parameters = network_training_parameters(),verbose = True): if opt_parameters['keras_opt'] == 'adam': optimizer = tf.keras.optimizers.Adam(learning_rate = opt_parameters['keras_alpha']) elif opt_parameters['keras_opt'] == 'sgd': @@ -104,12 +101,7 @@ def train_h1_network(network,train_dict,test_dict,opt_parameters = network_train assert len(opt_parameters['loss_weights']) == len(network.outputs) - if opt_parameters['nullspace_constraints']: - losses = [mol_normalized_mse]*len(network.outputs) - losses[0] = normalized_mse - losses[1] = normalized_mse - metrics = [mol_l2_accuracy] - elif opt_parameters['train_full_jacobian']: + if opt_parameters['train_full_jacobian']: assert len(network.outputs) == 2 losses = [normalized_mse]+[normalized_mse_matrix] metrics = [l2_accuracy]+[f_accuracy_matrix] @@ -120,44 +112,38 @@ def train_h1_network(network,train_dict,test_dict,opt_parameters = network_train network.compile(optimizer=optimizer,loss=losses,loss_weights = opt_parameters['loss_weights'],metrics=metrics) - if opt_parameters['constraint_sketching']: - assert 'left_sketch' in train_dict.keys() - assert 'right_sketch' in train_dict.keys() - assert 'left_sketch' in test_dict.keys() - assert 'right_sketch' in test_dict.keys() - input_train = [train_dict['m_data'],train_dict['U_data'],train_dict['V_data'],train_dict['left_sketch'],train_dict['right_sketch']] - input_test = [test_dict['m_data'],test_dict['U_data'],test_dict['V_data'],test_dict['left_sketch'],test_dict['right_sketch']] - output_train = [train_dict['q_data'],train_dict['sigma_data'],train_dict['zero_matrix'],train_dict['zero_matrix']] - output_test = [test_dict['q_data'],test_dict['sigma_data'],test_dict['zero_matrix'],test_dict['zero_matrix']] - elif opt_parameters['train_full_jacobian']: + + if opt_parameters['train_full_jacobian']: input_train = [train_dict['m_data']] - input_test = [test_dict['m_data']] output_train = [train_dict['q_data'],train_dict['J_data']] - output_test = [test_dict['q_data'],test_dict['J_data']] + if test_dict is None: + test_data = None + else: + input_test = [test_dict['m_data']] + output_test = [test_dict['q_data'],test_dict['J_data']] + test_data = (input_test,output_test) else: input_train = [train_dict['m_data'],train_dict['U_data'],train_dict['V_data']] - input_test = [test_dict['m_data'],test_dict['U_data'],test_dict['V_data']] output_train = [train_dict['q_data'],train_dict['sigma_data']] - output_test = [test_dict['q_data'],test_dict['sigma_data']] + if test_dict is None: + test_data = None + else: + input_test = [test_dict['m_data'],test_dict['U_data'],test_dict['V_data']] + output_test = [test_dict['q_data'],test_dict['sigma_data']] + test_data = (input_test,output_test) - if verbose: eval_train = network.evaluate(input_train,output_train,verbose=2) eval_train_dict = {out: eval_train[i] for i, out in enumerate(network.metrics_names)} - eval_test = network.evaluate(input_test,output_test,verbose=2) - eval_test_dict = {out: eval_test[i] for i, out in enumerate(network.metrics_names)} - if opt_parameters['nullspace_constraints']: - print('Before training: l2, h1, left, right training accuracies = ', eval_train[5], eval_train[6],eval_train[7], eval_train[8]) - print('Before training: l2, h1 left, right testing accuracies = ', eval_test[5], eval_test[6],eval_test[7], eval_test[8]) - else: - print('Before training: l2, h1 training accuracies = ', eval_train[3], eval_train[4]) - print('Before training: l2, h1 testing accuracies = ', eval_test[3], eval_test[4]) - print('eval_train_dict = ',eval_train_dict) - print('eval_test_dict = ',eval_test_dict) + print('Before training: l2, h1 training accuracies = ', eval_train[3], eval_train[6]) + if test_dict is not None: + eval_test = network.evaluate(input_test,output_test,verbose=2) + eval_test_dict = {out: eval_test[i] for i, out in enumerate(network.metrics_names)} + print('Before training: l2, h1 testing accuracies = ', eval_test[3], eval_test[6]) if opt_parameters['train_keras']: network.fit(input_train,output_train, - validation_data = (input_test,output_test),epochs = opt_parameters['keras_epochs'],\ + validation_data = test_data,epochs = opt_parameters['keras_epochs'],\ batch_size = opt_parameters['keras_batch_size'],verbose = opt_parameters['keras_verbose']) if opt_parameters['train_hessianlearn']: @@ -178,9 +164,11 @@ def train_h1_network(network,train_dict,test_dict,opt_parameters = network_train problem = KMW.problem hess_train_dict = {problem.x[0]:input_train[0],problem.x[1]:input_train[1],problem.x[2]:input_train[2],\ problem.y_true[0]:output_train[0],problem.y_true[1]:output_train[1]} - hess_val_dict = {problem.x[0]:input_test[0],problem.x[1]:input_test[1],problem.x[2]:input_test[2],\ - problem.y_true[0]:output_test[0],problem.y_true[1]:output_test[1]} - + if test_dict is not None: + hess_val_dict = {problem.x[0]:input_test[0],problem.x[1]:input_test[1],problem.x[2]:input_test[2],\ + problem.y_true[0]:output_test[0],problem.y_true[1]:output_test[1]} + else: + hess_val_dict = None data = hess.Data(hess_train_dict,opt_parameters['hess_gbatch_size'],\ validation_data = hess_val_dict,hessian_batch_size = opt_parameters['hess_batch_size']) # And finally one can call fit! @@ -188,17 +176,16 @@ def train_h1_network(network,train_dict,test_dict,opt_parameters = network_train if verbose: eval_train = network.evaluate(input_train,output_train,verbose=2) - eval_test = network.evaluate(input_test,output_test,verbose=2) - if opt_parameters['nullspace_constraints']: - print('After training: l2, h1, left, right training accuracies = ', eval_train[5], eval_train[6],eval_train[7], eval_train[8]) - print('After training: l2, h1 left, right testing accuracies = ', eval_test[5], eval_test[6],eval_test[7], eval_test[8]) - else: - print('After training: l2, h1 training accuracies = ', eval_train[3], eval_train[4]) - print('After training: l2, h1 testing accuracies = ', eval_test[3], eval_test[4]) + eval_train_dict = {out: eval_train[i] for i, out in enumerate(network.metrics_names)} + print('After training: l2, h1 training accuracies = ', eval_train[3], eval_train[6]) + if test_dict is not None: + eval_test = network.evaluate(input_test,output_test,verbose=2) + eval_test_dict = {out: eval_test[i] for i, out in enumerate(network.metrics_names)} + print('After training: l2, h1 testing accuracies = ', eval_test[3], eval_test[6]) return network -def train_l2_network(network,train_dict,test_dict,opt_parameters = network_training_parameters(),verbose = True): +def train_l2_network(network,train_dict,test_dict = None,opt_parameters = network_training_parameters(),verbose = True): if opt_parameters['keras_opt'] == 'adam': optimizer = tf.keras.optimizers.Adam(learning_rate = opt_parameters['keras_alpha']) elif opt_parameters['keras_opt'] == 'sgd': @@ -211,23 +198,29 @@ def train_l2_network(network,train_dict,test_dict,opt_parameters = network_train input_train = train_dict['m_data'] output_train = train_dict['q_data'] - input_test = test_dict['m_data'] - output_test = test_dict['q_data'] + if test_dict is None: + test_data = None + else: + input_test = test_dict['m_data'] + output_test = test_dict['q_data'] + test_data = (input_test,output_test) if verbose: l2_loss_train, l2_acc_train = network.evaluate(input_train,output_train,verbose=2) print('Before training: l2 accuracy = ', l2_acc_train) - l2_loss_test, l2_acc_test = network.evaluate(input_test,output_test,verbose=2) - print('Before training: l2accuracy = ', l2_acc_test) + if test_dict is not None: + l2_loss_test, l2_acc_test = network.evaluate(input_test,output_test,verbose=2) + print('Before training: l2accuracy = ', l2_acc_test) network.fit(input_train,output_train, - validation_data = (input_test,output_test),epochs = opt_parameters['keras_epochs'],\ + validation_data = test_data,epochs = opt_parameters['keras_epochs'],\ batch_size = opt_parameters['keras_batch_size'],verbose = opt_parameters['keras_verbose']) if verbose: l2_loss_train, l2_acc_train = network.evaluate(input_train,output_train,verbose=2) print('After training: l2 accuracy = ', l2_acc_train) - l2_loss_test, l2_acc_test = network.evaluate(input_test,output_test,verbose=2) - print('After training: l2accuracy = ', l2_acc_test) + if test_dict is not None: + l2_loss_test, l2_acc_test = network.evaluate(input_test,output_test,verbose=2) + print('After training: l2accuracy = ', l2_acc_test) return network