diff --git a/.github/workflows/unit_tests.yml b/.github/workflows/unit_tests.yml index e9e5879..1d54272 100644 --- a/.github/workflows/unit_tests.yml +++ b/.github/workflows/unit_tests.yml @@ -9,20 +9,24 @@ on: workflow_dispatch: jobs: - build: + lint: + name: Lint runs-on: ubuntu-latest steps: - - - uses: actions/checkout@v3 - - name: Set up Python - uses: actions/setup-python@v4 - with: - python-version: '3.9' - - name: Install dependencies - run: | - python -m pip install --upgrade pip - pip install -r requirements.txt - - name: Test with pytest - run: | - pip install pytest - pytest ncopt/tests/ \ No newline at end of file + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: "3.9" + - uses: pre-commit/action@v3.0.1 + + test: + name: Test + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: "3.9" + - run: pipx install hatch + - run: hatch env create + - run: hatch run test diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..a0eb12b --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,9 @@ +repos: + - repo: https://github.com/astral-sh/ruff-pre-commit + # Ruff version. + rev: v0.3.0 + hooks: + # Run the linter. + - id: ruff + # Run the formatter. + - id: ruff-format \ No newline at end of file diff --git a/README.md b/README.md index 418fbdb..bc330e9 100644 --- a/README.md +++ b/README.md @@ -3,6 +3,14 @@ This repository contains a `Python` implementation of the SQP-GS (*Sequential Qu **Note:** this implementation is a **prototype code**, it has been tested only for a simple problem and it is not performance-optimized. A Matlab implementation is available from the authors of the paper, see [2]. +## Installation + +If you want to install an editable version of this package in your Python environment, run the command + +``` + python -m pip install --editable . +``` + ## Mathematical description The algorithm can solve problems of the form diff --git a/example_rosenbrock.py b/example_rosenbrock.py index 96e21ec..c8e8c4e 100755 --- a/example_rosenbrock.py +++ b/example_rosenbrock.py @@ -1,13 +1,15 @@ """ @author: Fabian Schaipp """ -import numpy as np + import matplotlib.pyplot as plt +import numpy as np +from matplotlib.lines import Line2D +from ncopt.funs import f_rosenbrock, g_max from ncopt.sqpgs import SQPGS -from ncopt.funs import f_rosenbrock, g_max, g_linear -#%% Setup +# %% Setup f = f_rosenbrock() g = g_max() @@ -20,65 +22,61 @@ # equality constraints (list of functions) gE = [] -xstar = np.array([1/np.sqrt(2), 0.5]) # solution +xstar = np.array([1 / np.sqrt(2), 0.5]) # solution -#%% How to use the solver +# %% How to use the solver problem = SQPGS(f, gI, gE, x0=None, tol=1e-6, max_iter=100, verbose=True) x = problem.solve() -print(f"Distance to solution:", f'{np.linalg.norm(x-xstar):.9f}') +print("Distance to solution:", f"{np.linalg.norm(x-xstar):.9f}") -#%% Solve from multiple starting points and plot +# %% Solve from multiple starting points and plot -_x, _y = np.linspace(-2,2,100), np.linspace(-2,2,100) +_x, _y = np.linspace(-2, 2, 100), np.linspace(-2, 2, 100) X, Y = np.meshgrid(_x, _y) Z = np.zeros_like(X) for j in np.arange(X.shape[0]): for i in np.arange(X.shape[1]): - Z[i,j] = f.eval(np.array([X[i,j], Y[i,j]])) + Z[i, j] = f.eval(np.array([X[i, j], Y[i, j]])) -#%% -from matplotlib.lines import Line2D +# %% -fig, ax = plt.subplots(figsize=(5,4)) +fig, ax = plt.subplots(figsize=(5, 4)) np.random.seed(123) # Plot contour and solution -ax.contourf(X, Y, Z, cmap='gist_heat', levels=20, alpha=0.7, - antialiased=True, lw=0, zorder=0) -# ax.contourf(X, Y, Z, colors='lightgrey', levels=20, alpha=0.7, +ax.contourf(X, Y, Z, cmap="gist_heat", levels=20, alpha=0.7, antialiased=True, lw=0, zorder=0) +# ax.contourf(X, Y, Z, colors='lightgrey', levels=20, alpha=0.7, # antialiased=True, lw=0, zorder=0) -# ax.contour(X, Y, Z, cmap='gist_heat', levels=8, alpha=0.7, +# ax.contour(X, Y, Z, cmap='gist_heat', levels=8, alpha=0.7, # antialiased=True, linewidths=4, zorder=0) -ax.scatter(xstar[0], xstar[1], marker="*", s=200, c="gold", zorder=1) +ax.scatter(xstar[0], xstar[1], marker="*", s=200, c="gold", zorder=1) for i in range(20): x0 = np.random.randn(2) - problem = SQPGS(f, gI, gE, x0, tol=1e-6, max_iter=100, verbose=False, - store_history=True) + problem = SQPGS(f, gI, gE, x0, tol=1e-6, max_iter=100, verbose=False, store_history=True) x_k = problem.solve() print(x_k) x_hist = problem.x_hist - ax.plot(x_hist[:,0], x_hist[:,1], c="silver", lw=1, ls='--', alpha=0.5, zorder=2) + ax.plot(x_hist[:, 0], x_hist[:, 1], c="silver", lw=1, ls="--", alpha=0.5, zorder=2) ax.scatter(x_k[0], x_k[1], marker="+", s=50, c="k", zorder=3) ax.scatter(x0[0], x0[1], marker="o", s=30, c="silver", zorder=3) - -ax.set_xlim(-2,2) -ax.set_ylim(-2,2) + +ax.set_xlim(-2, 2) +ax.set_ylim(-2, 2) fig.suptitle("Trajectory for multiple starting points") -legend_elements = [Line2D([0], [0], marker='*', lw=0, color='gold', label='Solution', - markersize=15), - Line2D([0], [0], marker='o', lw=0, color='silver', label='Starting point', - markersize=8), - Line2D([0], [0], marker='+', lw=0, color='k', label='Final iterate', - markersize=8),] +legend_elements = [ + Line2D([0], [0], marker="*", lw=0, color="gold", label="Solution", markersize=15), + Line2D([0], [0], marker="o", lw=0, color="silver", label="Starting point", markersize=8), + Line2D([0], [0], marker="+", lw=0, color="k", label="Final iterate", markersize=8), +] ax.legend(handles=legend_elements, ncol=3, fontsize=8) fig.tight_layout() -fig.savefig('data/img/rosenbrock.png') +fig.savefig("data/img/rosenbrock.png") diff --git a/ncopt/sqpgs/__init__.py b/ncopt/sqpgs/__init__.py deleted file mode 100644 index 194bafb..0000000 --- a/ncopt/sqpgs/__init__.py +++ /dev/null @@ -1,2 +0,0 @@ -from .main import SQPGS - diff --git a/ncopt/sqpgs/defaults.py b/ncopt/sqpgs/defaults.py deleted file mode 100644 index 43a414d..0000000 --- a/ncopt/sqpgs/defaults.py +++ /dev/null @@ -1,37 +0,0 @@ -import copy - -class Dotdict(dict): - """dot.notation access to dictionary attributes""" - __getattr__ = dict.get - __setattr__ = dict.__setitem__ - __delattr__ = dict.__delitem__ - - -_defaults = {'tol': 1e-8, - 'max_iter': 100, - 'verbose': True, - 'assert_tol': 1e-5, - 'store_history': False, - 'log_every': 10 - } - -_option_defaults = {'eps': 1e-1, - 'rho' : 1e-1, - 'theta' : 1e-1, - 'eta' : 1e-8, - 'gamma' : 0.5, - 'beta_eps' : 0.5, - 'beta_rho' : 0.5, - 'beta_theta' : 0.8, - 'nu' : 10, - 'xi_s' : 1e3, - 'xi_y' : 1e3, - 'xi_sy' : 1e-6, - 'iter_H': 10, - 'num_points_obj' : 2, - 'num_points_gI' : 3, - 'num_points_gE' : 4, - } - -DEFAULT_ARG = Dotdict(_defaults) -DEFAULT_OPTION = Dotdict(_option_defaults) \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..7644d97 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,104 @@ +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[project] +name = "ncopt" +dynamic = ["version"] +description = 'This repository contains a Python implementation of the SQP-GS (Sequential Quadratic Programming - Gradient Sampling) algorithm by Curtis and Overton.' +readme = "README.md" +requires-python = ">=3.9" +license = "BSD-3-Clause" +keywords = [] +authors = [ + { name = "fabian-sp", email = "fabian.schaipp@gmail.com" }, +] +classifiers = [ + "Development Status :: 4 - Beta", + "Programming Language :: Python", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: Implementation :: CPython", + "Programming Language :: Python :: Implementation :: PyPy", +] +dependencies = [ + "numpy", + "torch", + "cvxopt", +] + +[project.urls] +Documentation = "https://github.com/fabian-sp/ncOPT#readme" +Issues = "https://github.com/fabian-sp/ncOPT/issues" +Source = "https://github.com/fabian-sp/ncOPT" + +[tool.hatch.version] +path = "src/ncopt/__about__.py" + +[tool.hatch.envs.default] +dependencies = [ + "coverage[toml]>=6.5", + "pytest", + "matplotlib", +] + +[tool.hatch.envs.default.scripts] +test = "pytest {args:tests}" +test-cov = "coverage run -m pytest {args:tests}" +cov-report = [ + "- coverage combine", + "coverage report", +] +cov = [ + "test-cov", + "cov-report", +] + +[[tool.hatch.envs.all.matrix]] +python = ["3.9", "3.10", "3.11", "3.12"] + +[tool.hatch.envs.types] +dependencies = [ + "mypy>=1.0.0", +] +[tool.hatch.envs.types.scripts] +check = "mypy --install-types --non-interactive {args:src/ncopt tests}" + +[tool.coverage.run] +source_pkgs = ["ncopt", "tests"] +branch = true +parallel = true +omit = [ + "src/ncopt/__about__.py", +] + +[tool.coverage.paths] +ncopt = ["src/ncopt", "*/ncopt/src/ncopt"] +tests = ["tests", "*/ncopt/tests"] + +[tool.coverage.report] +exclude_lines = [ + "no cov", + "if __name__ == .__main__.:", + "if TYPE_CHECKING:", +] + +[tool.ruff] +lint.select = [ + "E", + "F", + "I", + "NPY201", + "W605", # Check for invalid escape sequences in docstrings (errors in py >= 3.11) +] +lint.ignore = [ + "E741", # ambiguous variable name +] +line-length = 100 + +# The minimum Python version that should be supported +target-version = "py39" + +src = ["src"] diff --git a/scripts/timing_rosenbrock.py b/scripts/timing_rosenbrock.py index 8855b36..5c0d19e 100644 --- a/scripts/timing_rosenbrock.py +++ b/scripts/timing_rosenbrock.py @@ -1,40 +1,42 @@ """ author: Fabian Schaipp """ -import numpy as np -import sys -sys.path.append('..') +import timeit + +import numpy as np +from ncopt.funs import f_rosenbrock, g_linear, g_max from ncopt.sqpgs import SQPGS -from ncopt.funs import f_rosenbrock, g_max, g_linear -#%% +# %% f = f_rosenbrock() g = g_max() # inequality constraints (list of functions) gI = [g] -xstar = np.array([1/np.sqrt(2), 0.5]) +xstar = np.array([1 / np.sqrt(2), 0.5]) np.random.seed(31) x0 = np.random.randn(2) -#%% Timing one scalar inequality constraint +# %% Timing one scalar inequality constraint # equality constraints (list of scalar functions) gE = [] problem = SQPGS(f, gI, gE, x0, tol=1e-20, max_iter=100, verbose=False) -%timeit -n 20 x_k = problem.solve() +timeit.timeit("x_k = problem.solve()", number=20) -# result (start of refactoring): +# result (start of refactoring): # 168 ms ± 2.96 ms per loop (mean ± std. dev. of 7 runs, 20 loops each) -#%% Timing equality constraint +# %% Timing equality constraint -A = np.eye(2); b = np.zeros(2); g1 = g_linear(A, b) +A = np.eye(2) +b = np.zeros(2) +g1 = g_linear(A, b) # equality constraints (list of scalar functions) gE = [g1] @@ -43,7 +45,7 @@ np.random.seed(31) x0 = np.random.randn(2) -%timeit -n 10 x_k = problem.solve() +timeit.timeit("x_k = problem.solve()", number=10) -# result (start of refactoring): -# 291 ms ± 3.53 ms per loop (mean ± std. dev. of 7 runs, 10 loops each) \ No newline at end of file +# result (start of refactoring): +# 291 ms ± 3.53 ms per loop (mean ± std. dev. of 7 runs, 10 loops each) diff --git a/scripts/train_max_fun.py b/scripts/train_max_fun.py index d73f11c..16acabe 100755 --- a/scripts/train_max_fun.py +++ b/scripts/train_max_fun.py @@ -6,63 +6,69 @@ Serves as example how to use neural networks as constraint functions for SQP-GS. """ -import numpy as np import matplotlib.pyplot as plt +import numpy as np import torch from torch.optim.lr_scheduler import StepLR c1 = np.sqrt(2) -c2 = 2. +c2 = 2.0 + @np.vectorize -def g(x0,x1): - return np.maximum(c1*x0, c2*x1) - 1 +def g(x0, x1): + return np.maximum(c1 * x0, c2 * x1) - 1 + def generate_data(grid_points): - x0 = 2*np.random.randn(grid_points) - x1 = 2*np.random.randn(grid_points) + x0 = 2 * np.random.randn(grid_points) + x1 = 2 * np.random.randn(grid_points) x0.sort() x1.sort() - X0,X1 = np.meshgrid(x0,x1) - return X0,X1 + X0, X1 = np.meshgrid(x0, x1) + return X0, X1 + grid_points = 500 X0, X1 = generate_data(grid_points) -Z = g(X0,X1) +Z = g(X0, X1) -#%% Preparations +# %% Preparations -tmp = np.stack((X0.reshape(-1),X1.reshape(-1))).T +tmp = np.stack((X0.reshape(-1), X1.reshape(-1))).T # pytorch weights are in torch.float32, numpy data is float64 tX = torch.tensor(tmp, dtype=torch.float32) tZ = torch.tensor(Z.reshape(-1), dtype=torch.float32) -num_samples = len(tX) # number of training points +num_samples = len(tX) # number of training points + +# %% -#%% class Max2D(torch.nn.Module): def __init__(self): super().__init__() - self.l1 = torch.nn.Linear(2, 2) + self.l1 = torch.nn.Linear(2, 2) + def forward(self, x): x = self.l1(x) - x,_ = torch.max(x, dim=-1) + x, _ = torch.max(x, dim=-1) return x -loss_fn = torch.nn.MSELoss(reduction='mean') + +loss_fn = torch.nn.MSELoss(reduction="mean") model = Max2D() print(model.l1.weight) print(model.l1.bias) # testing -x = torch.tensor([1., 4.]) +x = torch.tensor([1.0, 4.0]) print("True value: ", g(x[0], x[1]), ". Predicted value: ", model(x).item()) -#%% Training +# %% Training lr = 1e-3 num_epochs = 10 @@ -71,44 +77,48 @@ def forward(self, x): optimizer = torch.optim.SGD(model.parameters(), lr=lr, momentum=0.9) scheduler = StepLR(optimizer, step_size=5, gamma=0.5) -def sample_batch(num_samples, b): + +def sample_batch(num_samples, b): S = torch.randint(high=num_samples, size=(b,)) return S + for epoch in range(num_epochs): - - epoch_loss = 0 - for t in range(num_samples//batch_size): - + epoch_loss = 0 + for t in range(num_samples // batch_size): S = sample_batch(num_samples, batch_size) x_batch = tX[S] z_batch = tZ[S] optimizer.zero_grad() - + loss = loss_fn(model.forward(x_batch), z_batch) loss.backward() optimizer.step() epoch_loss += loss.item() print(f"Epoch {epoch+1}/{num_epochs}: loss={np.mean(epoch_loss)}") - scheduler.step() + scheduler.step() print("Learned parameters:") print(model.l1.weight) print(model.l1.bias) -#%% Save checkpoint +# %% Save checkpoint + +path = "../data/checkpoints/max2d.pt" +torch.save( + { + "epoch": epoch, + "model_state_dict": model.state_dict(), + "optimizer_state_dict": optimizer.state_dict(), + }, + path, +) -path = '../data/checkpoints/max2d.pt' -torch.save({'epoch': epoch, - 'model_state_dict': model.state_dict(), - 'optimizer_state_dict': optimizer.state_dict(), - }, path) - -#%% Plot results +# %% Plot results N_test = 200 X0_test, X1_test = generate_data(N_test) @@ -120,20 +130,20 @@ def sample_batch(num_samples, b): Z_test = model.forward(X_test).detach().numpy().squeeze() Z_test_arr = Z_test.reshape(N_test, N_test) -Z_true = g(X0_test,X1_test) +Z_true = g(X0_test, X1_test) -print("Test mean squared error: ", np.mean((Z_test-Z_true.reshape(-1))**2)) +print("Test mean squared error: ", np.mean((Z_test - Z_true.reshape(-1)) ** 2)) -fig, axs = plt.subplots(1,2, figsize=(8,4)) +fig, axs = plt.subplots(1, 2, figsize=(8, 4)) ax = axs[0] -ax.contourf(X0_test, X1_test, Z_test_arr, cmap='magma', vmin=-10,vmax=10,levels=50) -ax.set_xlim(-5,5) -ax.set_ylim(-5,5) +ax.contourf(X0_test, X1_test, Z_test_arr, cmap="magma", vmin=-10, vmax=10, levels=50) +ax.set_xlim(-5, 5) +ax.set_ylim(-5, 5) ax.set_title("Learned contours") ax = axs[1] -ax.contourf(X0_test, X1_test, Z_true, cmap='magma', vmin=-10,vmax=10,levels=50) -ax.set_xlim(-5,5) -ax.set_ylim(-5,5) +ax.contourf(X0_test, X1_test, Z_true, cmap="magma", vmin=-10, vmax=10, levels=50) +ax.set_xlim(-5, 5) +ax.set_ylim(-5, 5) ax.set_title("True contours") diff --git a/src/ncopt/__about__.py b/src/ncopt/__about__.py new file mode 100644 index 0000000..f102a9c --- /dev/null +++ b/src/ncopt/__about__.py @@ -0,0 +1 @@ +__version__ = "0.0.1" diff --git a/ncopt/__init__.py b/src/ncopt/__init__.py similarity index 50% rename from ncopt/__init__.py rename to src/ncopt/__init__.py index 139597f..8b13789 100644 --- a/ncopt/__init__.py +++ b/src/ncopt/__init__.py @@ -1,2 +1 @@ - diff --git a/ncopt/funs.py b/src/ncopt/funs.py similarity index 56% rename from ncopt/funs.py rename to src/ncopt/funs.py index 7aea406..206f939 100755 --- a/ncopt/funs.py +++ b/src/ncopt/funs.py @@ -4,60 +4,64 @@ import numpy as np + class f_rosenbrock: """ - Nonsmooth Rosenbrock function (see 5.1 in Curtis, Overton "SQP FOR NONSMOOTH CONSTRAINED OPTIMIZATION") - + Nonsmooth Rosenbrock function (see 5.1 in Curtis, Overton + "SQP FOR NONSMOOTH CONSTRAINED OPTIMIZATION") + x -> w|x_1^2 − x_2| + (1 − x_1)^2 """ - def __init__(self, w = 8): - self.name = 'rosenbrock' + + def __init__(self, w=8): + self.name = "rosenbrock" self.dim = 2 self.dimOut = 1 self.w = w - - def eval(self, x): - return self.w*np.abs(x[0]**2-x[1]) + (1-x[0])**2 - + + def eval(self, x): + return self.w * np.abs(x[0] ** 2 - x[1]) + (1 - x[0]) ** 2 + def differentiable(self, x): - return np.abs(x[0]**2 - x[1]) > 1e-10 - + return np.abs(x[0] ** 2 - x[1]) > 1e-10 + def grad(self, x): - a = np.array([-2+x[0], 0]) - sign = np.sign(x[0]**2 -x[1]) - + a = np.array([-2 + x[0], 0]) + sign = np.sign(x[0] ** 2 - x[1]) + if sign == 1: - b = np.array([2*x[0], -1]) + b = np.array([2 * x[0], -1]) elif sign == -1: - b = np.array([-2*x[0], 1]) + b = np.array([-2 * x[0], 1]) else: - b = np.array([-2*x[0], 1]) - - #b = np.sign(x[0]**2 -x[1]) * np.array([2*x[0], -1]) + b = np.array([-2 * x[0], 1]) + + # b = np.sign(x[0]**2 -x[1]) * np.array([2*x[0], -1]) return a + b - + + class g_max: """ maximum function (see 5.1 in Curtis, Overton "SQP FOR NONSMOOTH CONSTRAINED OPTIMIZATION") - + x -> max(c1*x_1, c2*x_2) - 1 """ - def __init__(self, c1 = np.sqrt(2), c2 = 2.): - self.name = 'max' + + def __init__(self, c1=np.sqrt(2), c2=2.0): + self.name = "max" self.c1 = c1 self.c2 = c2 self.dimOut = 1 return - + def eval(self, x): - return np.maximum(self.c1*x[0], self.c2*x[1]) - 1 - + return np.maximum(self.c1 * x[0], self.c2 * x[1]) - 1 + def differentiable(self, x): - return np.abs(self.c1*x[0] -self.c2*x[1]) > 1e-10 - + return np.abs(self.c1 * x[0] - self.c2 * x[1]) > 1e-10 + def grad(self, x): - - sign = np.sign(self.c1*x[0] - self.c2*x[1]) + sign = np.sign(self.c1 * x[0] - self.c2 * x[1]) if sign == 1: g = np.array([self.c1, 0]) elif sign == -1: @@ -66,27 +70,27 @@ def grad(self, x): g = np.array([0, self.c2]) return g + class g_linear: """ linear constraint: - + x -> Ax - b """ + def __init__(self, A, b): - self.name = 'linear' + self.name = "linear" self.A = A self.b = b self.dim = A.shape[0] self.dimOut = A.shape[1] return - + def eval(self, x): return self.A @ x - self.b - + def differentiable(self, x): return True - + def grad(self, x): return self.A - - diff --git a/src/ncopt/sqpgs/__init__.py b/src/ncopt/sqpgs/__init__.py new file mode 100644 index 0000000..dcab673 --- /dev/null +++ b/src/ncopt/sqpgs/__init__.py @@ -0,0 +1 @@ +from .main import SQPGS # noqa diff --git a/src/ncopt/sqpgs/defaults.py b/src/ncopt/sqpgs/defaults.py new file mode 100644 index 0000000..783ac0a --- /dev/null +++ b/src/ncopt/sqpgs/defaults.py @@ -0,0 +1,38 @@ +class Dotdict(dict): + """dot.notation access to dictionary attributes""" + + __getattr__ = dict.get + __setattr__ = dict.__setitem__ + __delattr__ = dict.__delitem__ + + +_defaults = { + "tol": 1e-8, + "max_iter": 100, + "verbose": True, + "assert_tol": 1e-5, + "store_history": False, + "log_every": 10, +} + +_option_defaults = { + "eps": 1e-1, + "rho": 1e-1, + "theta": 1e-1, + "eta": 1e-8, + "gamma": 0.5, + "beta_eps": 0.5, + "beta_rho": 0.5, + "beta_theta": 0.8, + "nu": 10, + "xi_s": 1e3, + "xi_y": 1e3, + "xi_sy": 1e-6, + "iter_H": 10, + "num_points_obj": 2, + "num_points_gI": 3, + "num_points_gE": 4, +} + +DEFAULT_ARG = Dotdict(_defaults) +DEFAULT_OPTION = Dotdict(_option_defaults) diff --git a/ncopt/sqpgs/main.py b/src/ncopt/sqpgs/main.py similarity index 52% rename from ncopt/sqpgs/main.py rename to src/ncopt/sqpgs/main.py index 7bb0bf0..a010779 100644 --- a/ncopt/sqpgs/main.py +++ b/src/ncopt/sqpgs/main.py @@ -1,46 +1,54 @@ """ @author: Fabian Schaipp -Implements the SQP-GS algorithm from +Implements the SQP-GS algorithm from - Frank E. Curtis and Michael L. Overton, A sequential quadratic programming algorithm for nonconvex, nonsmooth constrained optimization, + Frank E. Curtis and Michael L. Overton, A sequential quadratic programming + algorithm for nonconvex, nonsmooth constrained optimization, SIAM Journal on Optimization 2012 22:2, 474-500, https://doi.org/10.1137/090780201. The notation of the code tries to follow closely the notation of the paper. """ -import numpy as np -import cvxopt as cx - import copy import time from typing import Optional -from .defaults import DEFAULT_ARG, DEFAULT_OPTION -from ..utils import get_logger +import cvxopt as cx +import numpy as np + +from ncopt.sqpgs.defaults import DEFAULT_ARG, DEFAULT_OPTION +from ncopt.utils import get_logger + class SQPGS: - def __init__(self, - f, - gI, - gE, - x0: Optional[np.array]=None, - tol: float=DEFAULT_ARG.tol, - max_iter: int=DEFAULT_ARG.max_iter, - verbose: bool=DEFAULT_ARG.verbose, - options: dict={}, - assert_tol: float=DEFAULT_ARG.assert_tol, - store_history: bool=DEFAULT_ARG.store_history, - log_every: int=DEFAULT_ARG.log_every - ) -> None: - + def __init__( + self, + f, + gI, + gE, + x0: Optional[np.array] = None, + tol: float = DEFAULT_ARG.tol, + max_iter: int = DEFAULT_ARG.max_iter, + verbose: bool = DEFAULT_ARG.verbose, + options: dict = {}, + assert_tol: float = DEFAULT_ARG.assert_tol, + store_history: bool = DEFAULT_ARG.store_history, + log_every: int = DEFAULT_ARG.log_every, + ) -> None: if tol < 0: raise ValueError(f"Tolerance must be non-negative, but was specified as {tol}.") if max_iter < 0: - raise ValueError(f"Maximum number of iterations must be non-negative, but was specified as {max_iter}.") + raise ValueError( + f"Maximum number of iterations must be non-negative, \ + but was specified as {max_iter}." + ) if assert_tol < 0: - raise ValueError(f"Assertion tolerance must be non-negative, but was specified as {assert_tol}.") - + raise ValueError( + f"Assertion tolerance must be non-negative, but \ + was specified as {assert_tol}." + ) + self.f = f self.gI = gI self.gE = gE @@ -59,213 +67,232 @@ def __init__(self, ############################################################### ########## Extract dimensions - + # extract dimensions of constraints self.dim = self.f.dim self.dimI = np.array([g.dimOut for g in self.gI], dtype=int) self.dimE = np.array([g.dimOut for g in self.gE], dtype=int) - - - self.nI_ = len(self.gI) # number of inequality function objects - self.nE_ = len(self.gE) # number of equality function objects - - self.nI = sum(self.dimI) # number of inequality costraints - self.nE = sum(self.dimE) # number of equality costraints - + + self.nI_ = len(self.gI) # number of inequality function objects + self.nE_ = len(self.gE) # number of equality function objects + + self.nI = sum(self.dimI) # number of inequality costraints + self.nE = sum(self.dimE) # number of equality costraints + ############################################################### ########## Initialize - - self.status = 'not optimal' - self.logger = get_logger(name='ncopt', verbose=self.verbose) - + + self.status = "not optimal" + self.logger = get_logger(name="ncopt", verbose=self.verbose) + # starting point if x0 is None: self.x_k = np.zeros(self.dim) else: self.x_k = x0.copy() - + return - + def solve(self): ############################################################### - ########## Set all hyperparameters - - eps = self.options['eps'] # sampling radius - rho = self.options['rho'] - theta = self.options['theta'] - - eta = self.options['eta'] - gamma = self.options['gamma'] - beta_eps = self.options['beta_eps'] - beta_rho = self.options['beta_rho'] - beta_theta = self.options['beta_theta'] - nu = self.options['nu'] - xi_s = self.options['xi_s'] - xi_y = self.options['xi_y'] - xi_sy = self.options['xi_sy'] - iter_H = self.options['iter_H'] - - p0 = self.options['num_points_obj'] # sample points for objective - pI_ = self.options['num_points_gI'] * np.ones(self.nI_, dtype=int) # sample points for ineq constraint - pE_ = self.options['num_points_gE'] * np.ones(self.nE_, dtype=int) # sample points for eq constraint - + ########## Set all hyperparameters + + eps = self.options["eps"] # sampling radius + rho = self.options["rho"] + theta = self.options["theta"] + + eta = self.options["eta"] + gamma = self.options["gamma"] + beta_eps = self.options["beta_eps"] + beta_rho = self.options["beta_rho"] + beta_theta = self.options["beta_theta"] + nu = self.options["nu"] + xi_s = self.options["xi_s"] + xi_y = self.options["xi_y"] + xi_sy = self.options["xi_sy"] + iter_H = self.options["iter_H"] + + p0 = self.options["num_points_obj"] # sample points for objective + pI_ = self.options["num_points_gI"] * np.ones( + self.nI_, dtype=int + ) # sample points for ineq constraint + pE_ = self.options["num_points_gE"] * np.ones( + self.nE_, dtype=int + ) # sample points for eq constraint + pI = np.repeat(pI_, self.dimI) pE = np.repeat(pE_, self.dimE) ############################################################### self.SP = SubproblemSQPGS(self.dim, p0, pI, pE, self.assert_tol) - - E_k = np.inf # for stopping criterion - x_kmin1 = None # last iterate - g_kmin1 = None # + + E_k = np.inf # for stopping criterion + x_kmin1 = None # last iterate + g_kmin1 = None # # Hessian matrix H = np.eye(self.dim) s_hist = np.zeros((self.dim, iter_H)) y_hist = np.zeros((self.dim, iter_H)) - + do_step = False x_hist = [self.x_k] if self.store_history else None - self.timings = {'total': [], 'sp_update': [], 'sp_solve': []} + self.timings = {"total": [], "sp_update": [], "sp_solve": []} ############################################## # START OF LOOP ############################################## for iter_k in range(self.max_iter): - t0 = time.perf_counter() if E_k <= self.tol: - self.status = 'optimal' + self.status = "optimal" break - + ############################################## # SAMPLING ############################################## B_f = sample_points(self.x_k, eps, p0) B_f = np.vstack((self.x_k, B_f)) - + B_gI = list() for j in np.arange(self.nI_): B_j = sample_points(self.x_k, eps, pI_[j]) B_j = np.vstack((self.x_k, B_j)) B_gI.append(B_j) - + B_gE = list() for j in np.arange(self.nE_): B_j = sample_points(self.x_k, eps, pE_[j]) B_j = np.vstack((self.x_k, B_j)) B_gE.append(B_j) - - + #################################### # COMPUTE GRADIENTS AND EVALUATE ################################### - D_f = compute_gradients(self.f, B_f)[0] # returns list, always has one element - + D_f = compute_gradients(self.f, B_f)[0] # returns list, always has one element + D_gI = list() for j in np.arange(self.nI_): D_gI += compute_gradients(self.gI[j], B_gI[j]) - + D_gE = list() for j in np.arange(self.nE_): D_gE += compute_gradients(self.gE[j], B_gE[j]) - + f_k = self.f.eval(self.x_k) # hstack cannot handle empty lists! if self.nI_ > 0: gI_k = np.hstack([self.gI[j].eval(self.x_k) for j in range(self.nI_)]) else: gI_k = np.array([]) - + if self.nE_ > 0: gE_k = np.hstack([self.gE[j].eval(self.x_k) for j in range(self.nE_)]) else: gE_k = np.array([]) - + ############################################## # SUBPROBLEM ############################################## - + t01 = time.perf_counter() self.SP.update(H, rho, D_f, D_gI, D_gE, f_k, gI_k, gE_k) t11 = time.perf_counter() self.SP.solve() t21 = time.perf_counter() - self.timings['sp_update'].append(t11-t01) - self.timings['sp_solve'].append(t21-t11) + self.timings["sp_update"].append(t11 - t01) + self.timings["sp_solve"].append(t21 - t11) d_k = self.SP.d.copy() - # compute g_k from paper - g_k = self.SP.lambda_f @ D_f \ - + np.sum([self.SP.lambda_gI[j] @ D_gI[j] for j in range(self.nI)], axis = 0) \ - + np.sum([self.SP.lambda_gE[j] @ D_gE[j] for j in range(self.nE)], axis = 0) - + # compute g_k from paper + g_k = ( + self.SP.lambda_f @ D_f + + np.sum([self.SP.lambda_gI[j] @ D_gI[j] for j in range(self.nI)], axis=0) + + np.sum([self.SP.lambda_gE[j] @ D_gE[j] for j in range(self.nE)], axis=0) + ) + # evaluate v(x) at x=x_k v_k = np.maximum(gI_k, 0).sum() + np.sum(np.abs(gE_k)) - phi_k = rho*f_k + v_k - delta_q = phi_k - q_rho(d_k, rho, H, f_k, gI_k, gE_k, D_f, D_gI, D_gE) - - assert delta_q >= -self.assert_tol, f"Value is supposed to be non-negative, but is {delta_q}." - assert np.abs(self.SP.lambda_f.sum() - rho) <= self.assert_tol, f"Value is supposed to be negative, but is {np.abs(self.SP.lambda_f.sum() - rho)}." - + phi_k = rho * f_k + v_k + delta_q = phi_k - q_rho(d_k, rho, H, f_k, gI_k, gE_k, D_f, D_gI, D_gE) + + assert ( + delta_q >= -self.assert_tol + ), f"Value is supposed to be non-negative, but is {delta_q}." + assert ( + np.abs(self.SP.lambda_f.sum() - rho) <= self.assert_tol + ), f"Value is supposed to be negative, but is {np.abs(self.SP.lambda_f.sum() - rho)}." + # Logging, start after first iteration if iter_k % self.log_every == 1: - violI_k = np.maximum(gI_k, 0) + violI_k = np.maximum(gI_k, 0) violE_k = np.abs(gE_k) viol_k = np.max(np.hstack((violI_k, violE_k))) - self.logger.info(f"Iter {iter_k}, objective {f_k:.3E}, constraint violation {viol_k:.3E}, accuracy {E_k:.3E}, avg runtime/iter {(1e3)*np.mean(self.timings['total']):.3E} ms.") - - new_E_k = stop_criterion(self.gI, self.gE, g_k, self.SP, gI_k, gE_k, B_gI, B_gE, self.nI_, self.nE_, pI, pE) + self.logger.info( + f"Iter {iter_k}, objective {f_k:.3E}, constraint violation {viol_k:.3E}, \ + accuracy {E_k:.3E}, \ + avg runtime/iter {(1e3)*np.mean(self.timings['total']):.3E} ms." + ) + + new_E_k = stop_criterion( + self.gI, self.gE, g_k, self.SP, gI_k, gE_k, B_gI, B_gE, self.nI_, self.nE_, pI, pE + ) E_k = min(E_k, new_E_k) - + ############################################## # STEP ############################################## - - do_step = delta_q > nu*eps**2 # Flag whether step is taken or not + + do_step = delta_q > nu * eps**2 # Flag whether step is taken or not if do_step: - alpha = 1. - phi_new = phi_rho(self.x_k + alpha*d_k, self.f, self.gI, self.gE, rho) - + alpha = 1.0 + phi_new = phi_rho(self.x_k + alpha * d_k, self.f, self.gI, self.gE, rho) + # Armijo step size rule - while phi_new > phi_k - eta*alpha*delta_q: + while phi_new > phi_k - eta * alpha * delta_q: alpha *= gamma - phi_new = phi_rho(self.x_k + alpha*d_k, self.f, self.gI, self.gE, rho) - + phi_new = phi_rho(self.x_k + alpha * d_k, self.f, self.gI, self.gE, rho) + # update Hessian if x_kmin1 is not None: s_k = self.x_k - x_kmin1 s_hist = np.roll(s_hist, 1, axis=1) - s_hist[:,0] = s_k - + s_hist[:, 0] = s_k + y_k = g_k - g_kmin1 y_hist = np.roll(y_hist, 1, axis=1) - y_hist[:,0] = y_k - + y_hist[:, 0] = y_k + hH = np.eye(self.dim) for l in np.arange(iter_H): - sl = s_hist[:,l] - yl = y_hist[:,l] - - cond1 = (np.linalg.norm(sl) <= xi_s*eps) and (np.linalg.norm(yl) <= xi_y*eps) - cond2 = (np.inner(sl,yl) >= xi_sy*eps**2) - cond = cond1 and cond2 - + sl = s_hist[:, l] + yl = y_hist[:, l] + + cond1 = (np.linalg.norm(sl) <= xi_s * eps) and ( + np.linalg.norm(yl) <= xi_y * eps + ) + cond2 = np.inner(sl, yl) >= xi_sy * eps**2 + cond = cond1 and cond2 + if cond: Hs = hH @ sl - hH = hH - np.outer(Hs,Hs)/(sl @ Hs + 1e-16) + np.outer(yl,yl)/(yl @ sl + 1e-16) + hH = ( + hH + - np.outer(Hs, Hs) / (sl @ Hs + 1e-16) + + np.outer(yl, yl) / (yl @ sl + 1e-16) + ) H = hH.copy() - + #################################### # ACTUAL STEP ################################### x_kmin1 = self.x_k.copy() g_kmin1 = g_k.copy() - - self.x_k = self.x_k + alpha*d_k - + + self.x_k = self.x_k + alpha * d_k + ############################################## # NO STEP ############################################## @@ -274,29 +301,29 @@ def solve(self): theta *= beta_theta else: rho *= beta_rho - + eps *= beta_eps - - + if self.store_history: x_hist.append(self.x_k) t1 = time.perf_counter() - self.timings['total'].append(t1-t0) - + self.timings["total"].append(t1 - t0) + ############################################## # END OF LOOP ############################################## self.x_hist = np.vstack(x_hist) if self.store_history else None - + if E_k > self.tol: - self.status = 'max iterations reached' + self.status = "max iterations reached" _final_msg = f"SQP-GS has terminated with status: {self.status}, final accuracy {E_k}." self.logger.info(_final_msg) - print(_final_msg) # we always want to display this. + print(_final_msg) # we always want to display this. return self.x_k + def sample_points(x, eps, N): """ sample N points uniformly distributed in eps-ball around x @@ -304,29 +331,30 @@ def sample_points(x, eps, N): dim = len(x) U = np.random.randn(N, dim) norm_U = np.linalg.norm(U, axis=1) - R = np.random.rand(N)**(1/dim) - Z = eps * (R/norm_U)[:,np.newaxis] * U - + R = np.random.rand(N) ** (1 / dim) + Z = eps * (R / norm_U)[:, np.newaxis] * U + return x + Z def q_rho(d, rho, H, f_k, gI_k, gE_k, D_f, D_gI, D_gE): - term1 = rho* (f_k + np.max(D_f @ d)) - + term1 = rho * (f_k + np.max(D_f @ d)) + term2 = 0 for j in np.arange(len(D_gI)): term2 += np.maximum(gI_k[j] + D_gI[j] @ d, 0).max() - + term3 = 0 for l in np.arange(len(D_gE)): term3 += np.abs(gE_k[l] + D_gE[l] @ d).max() - - term4 = 0.5 * d.T@H@d - return term1+term2+term3+term4 + + term4 = 0.5 * d.T @ H @ d + return term1 + term2 + term3 + term4 + def phi_rho(x, f, gI, gE, rho): - term1 = rho*f.eval(x) - + term1 = rho * f.eval(x) + # inequalities if len(gI) > 0: term2 = np.sum(np.hstack([np.maximum(gI[j].eval(x), 0) for j in range(len(gI))])) @@ -337,71 +365,79 @@ def phi_rho(x, f, gI, gE, rho): term3 = np.sum(np.hstack([np.abs(gE[l].eval(x)) for l in range(len(gE))])) else: term3 = 0 - - return term1+term2+term3 + + return term1 + term2 + term3 + def stop_criterion(gI, gE, g_k, SP, gI_k, gE_k, B_gI, B_gE, nI_, nE_, pI, pE): """ computes E_k in the paper """ val1 = np.linalg.norm(g_k, np.inf) - + # as gI or gE could be empty, we need a max value for empty arrays --> initial argument val2 = np.max(gI_k, initial=-np.inf) val3 = np.max(np.abs(gE_k), initial=-np.inf) - + gI_vals = list() for j in np.arange(nI_): gI_vals += eval_ineq(gI[j], B_gI[j]) - + val4 = -np.inf for j in np.arange(len(gI_vals)): val4 = np.maximum(val4, np.max(SP.lambda_gI[j] * gI_vals[j])) - + gE_vals = list() for j in np.arange(nE_): gE_vals += eval_ineq(gE[j], B_gE[j]) - + val5 = -np.inf for j in np.arange(len(gE_vals)): val5 = np.maximum(val5, np.max(SP.lambda_gE[j] * gE_vals[j])) - + return np.max(np.array([val1, val2, val3, val4, val5])) + def eval_ineq(fun, X): """ evaluate function at multiple inputs needed in stop_criterion - + Returns ------- - list of array, number of entries = fun.dimOut + list of array, number of entries = fun.dimOut """ (N, _) = X.shape D = np.zeros((N, fun.dimOut)) for i in np.arange(N): - D[i,:,] = fun.eval(X[i,:]) - - return [D[:,j] for j in range(fun.dimOut)] + D[ + i, + :, + ] = fun.eval(X[i, :]) + + return [D[:, j] for j in range(fun.dimOut)] def compute_gradients(fun, X): - """ + """ computes gradients of function object f at all rows of array X - + Returns ------- list of 2d-matrices, length of fun.dimOut """ (N, dim) = X.shape - + # fun.grad returns Jacobian, i.e. dimOut x dim D = np.zeros((N, fun.dimOut, dim)) for i in np.arange(N): - D[i,:,:] = fun.grad(X[i,:]) - - return [D[:,j,:] for j in np.arange(fun.dimOut)] -#%% + D[i, :, :] = fun.grad(X[i, :]) + + return [D[:, j, :] for j in np.arange(fun.dimOut)] + + +# %% + class SubproblemSQPGS: def __init__(self, dim, p0, pI, pE, assert_tol): @@ -411,128 +447,165 @@ def __init__(self, dim, p0, pI, pE, assert_tol): pI : array, number of sample points for inequality constraint (excluding x_k itself) pE : array, number of sample points for equality constraint (excluding x_k itself) """ - + self.dim = dim self.nI = len(pI) self.nE = len(pE) self.p0 = p0 self.pI = pI self.pE = pE - + self.assert_tol = assert_tol self.P, self.q, self.inG, self.inh, self.nonnegG, self.nonnegh = self.initialize() - - + def solve(self): """ - This solves the quadratic program. In every iteration, you should call self.update() before solving in order to have the correct subproblem data. - + This solves the quadratic program. In every iteration, you should call + self.update() before solving in order to have the correct subproblem data. + self.d: array search direction - + self.lambda_f: array KKT multipier for objective. - + self.lambda_gE: list - KKT multipier for equality constraints. - + KKT multipier for equality constraints. + self.lambda_gI: list - KKT multipier for inequality constraints. + KKT multipier for inequality constraints. """ - cx.solvers.options['show_progress'] = False - + cx.solvers.options["show_progress"] = False + iG = np.vstack((self.inG, self.nonnegG)) ih = np.hstack((self.inh, self.nonnegh)) - - qp = cx.solvers.qp(P=cx.matrix(self.P), q=cx.matrix(self.q), G=cx.matrix(iG), h=cx.matrix(ih)) - + + qp = cx.solvers.qp( + P=cx.matrix(self.P), q=cx.matrix(self.q), G=cx.matrix(iG), h=cx.matrix(ih) + ) + self.status = qp["status"] - self.cvx_sol_x = np.array(qp['x']).squeeze() - - self.d = self.cvx_sol_x[:self.dim] + self.cvx_sol_x = np.array(qp["x"]).squeeze() + + self.d = self.cvx_sol_x[: self.dim] self.z = self.cvx_sol_x[self.dim] - self.rI = self.cvx_sol_x[self.dim +1 : self.dim +1 +self.nI] - self.rE = self.cvx_sol_x[self.dim +1 + self.nI : ] - + self.rI = self.cvx_sol_x[self.dim + 1 : self.dim + 1 + self.nI] + self.rE = self.cvx_sol_x[self.dim + 1 + self.nI :] + assert len(self.rE) == self.nE - assert np.all(self.rI >= -self.assert_tol) , f"Array should be non-negative, but minimal value is {np.min(self.rI)}." - assert np.all(self.rE >= -self.assert_tol), f"Array should be non-negative, but minimal value is {np.min(self.rE)}." - + assert np.all( + self.rI >= -self.assert_tol + ), f"Array should be non-negative, but minimal value is {np.min(self.rI)}." + assert np.all( + self.rE >= -self.assert_tol + ), f"Array should be non-negative, but minimal value is {np.min(self.rE)}." + # extract dual variables = KKT multipliers - self.cvx_sol_z = np.array(qp['z']).squeeze() - lambda_f = self.cvx_sol_z[:self.p0+1] - + self.cvx_sol_z = np.array(qp["z"]).squeeze() + lambda_f = self.cvx_sol_z[: self.p0 + 1] + lambda_gI = list() for j in np.arange(self.nI): - start_ix = self.p0+1+(1+self.pI)[:j].sum() - lambda_gI.append( self.cvx_sol_z[start_ix : start_ix + 1+self.pI[j]] ) - + start_ix = self.p0 + 1 + (1 + self.pI)[:j].sum() + lambda_gI.append(self.cvx_sol_z[start_ix : start_ix + 1 + self.pI[j]]) + lambda_gE = list() for j in np.arange(self.nE): - start_ix = self.p0+1+(1+self.pI).sum()+(1+self.pE)[:j].sum() - + start_ix = self.p0 + 1 + (1 + self.pI).sum() + (1 + self.pE)[:j].sum() + # from ineq with + - vec1 = self.cvx_sol_z[start_ix : start_ix + 1+self.pE[j]] - + vec1 = self.cvx_sol_z[start_ix : start_ix + 1 + self.pE[j]] + # from ineq with - - vec2 = self.cvx_sol_z[start_ix+(1+self.pE).sum() : start_ix + (1+self.pE).sum() + 1+self.pE[j]] - + vec2 = self.cvx_sol_z[ + start_ix + (1 + self.pE).sum() : start_ix + (1 + self.pE).sum() + 1 + self.pE[j] + ] + # see Direction.m line 620 in the original Matlab code - lambda_gE.append(vec1-vec2) - + lambda_gE.append(vec1 - vec2) + self.lambda_f = lambda_f.copy() self.lambda_gI = lambda_gI.copy() self.lambda_gE = lambda_gE.copy() - - return - - + + return + def initialize(self): """ The quadratic subrpoblem we solve in every iteration is of the form: - + min_y 1/2* yPy + q*y subject to Gy <= h - + variable structure: y=(d,z,rI,rE) with d = search direction z = helper variable for objective rI = helper variable for inequality constraints rI = helper variable for equality constraints - - This function initializes the variables P,q,G,h. The entries which change in every iteration are then updated in self.update() - + + This function initializes the variables P,q,G,h. The entries which + change in every iteration are then updated in self.update() + G and h consist of two parts: 1) inG, inh: the inequalities from the paper 2) nonnegG, nonnegh: nonnegativity bounds rI >= 0, rE >= 0 """ - - dimQP = self.dim+1 + self.nI + self.nE - + + dimQP = self.dim + 1 + self.nI + self.nE + P = np.zeros((dimQP, dimQP)) q = np.zeros(dimQP) - - inG = np.zeros((1 + self.p0+np.sum(1+self.pI) + 2*np.sum(1+self.pE), dimQP)) - inh = np.zeros( 1 + self.p0+np.sum(1+self.pI) + 2*np.sum(1+self.pE)) - + + inG = np.zeros((1 + self.p0 + np.sum(1 + self.pI) + 2 * np.sum(1 + self.pE), dimQP)) + inh = np.zeros(1 + self.p0 + np.sum(1 + self.pI) + 2 * np.sum(1 + self.pE)) + # structure of inG (p0+1, sum(1+pI), sum(1+pE), sum(1+pE)) - inG[:self.p0+1, self.dim] = -1 - + inG[: self.p0 + 1, self.dim] = -1 + for j in range(self.nI): - inG[self.p0+1+(1+self.pI)[:j].sum() : self.p0+1+(1+self.pI)[:j].sum() + self.pI[j]+1, self.dim+1+j] = -1 - + inG[ + self.p0 + 1 + (1 + self.pI)[:j].sum() : self.p0 + + 1 + + (1 + self.pI)[:j].sum() + + self.pI[j] + + 1, + self.dim + 1 + j, + ] = -1 + for j in range(self.nE): - inG[self.p0+1+(1+self.pI).sum()+(1+self.pE)[:j].sum() : self.p0+1+(1+self.pI).sum()+(1+self.pE)[:j].sum() + self.pE[j]+1, self.dim+1+self.nI+j] = -1 - inG[self.p0+1+(1+self.pI).sum()+(1+self.pE).sum()+(1+self.pE)[:j].sum() : self.p0+1+(1+self.pI).sum()+(1+self.pE).sum()+(1+self.pE)[:j].sum() + self.pE[j]+1, self.dim+1+self.nI+j] = -1 - + inG[ + self.p0 + 1 + (1 + self.pI).sum() + (1 + self.pE)[:j].sum() : self.p0 + + 1 + + (1 + self.pI).sum() + + (1 + self.pE)[:j].sum() + + self.pE[j] + + 1, + self.dim + 1 + self.nI + j, + ] = -1 + inG[ + self.p0 + + 1 + + (1 + self.pI).sum() + + (1 + self.pE).sum() + + (1 + self.pE)[:j].sum() : self.p0 + + 1 + + (1 + self.pI).sum() + + (1 + self.pE).sum() + + (1 + self.pE)[:j].sum() + + self.pE[j] + + 1, + self.dim + 1 + self.nI + j, + ] = -1 + # we have nI+nE r-variables - nonnegG = np.hstack((np.zeros((self.nI + self.nE, self.dim + 1)), -np.eye(self.nI + self.nE))) + nonnegG = np.hstack( + (np.zeros((self.nI + self.nE, self.dim + 1)), -np.eye(self.nI + self.nE)) + ) nonnegh = np.zeros(self.nI + self.nE) - - return P,q,inG,inh,nonnegG,nonnegh + return P, q, inG, inh, nonnegG, nonnegh def update(self, H, rho, D_f, D_gI, D_gE, f_k, gI_k, gE_k): """ @@ -555,31 +628,80 @@ def update(self, H, rho, D_f, D_gI, D_gE, f_k, gI_k, gE_k): evaluation of inequality constraints at x_k. gE_k : array evaluation of equality constraints at x_k. - + Returns ------- None. """ - self.P[:self.dim, :self.dim] = H - self.q = np.hstack((np.zeros(self.dim), rho, np.ones(self.nI), np.ones(self.nE))) - - self.inG[:self.p0+1, :self.dim] = D_f - self.inh[:self.p0+1] = -f_k - + self.P[: self.dim, : self.dim] = H + self.q = np.hstack((np.zeros(self.dim), rho, np.ones(self.nI), np.ones(self.nE))) + + self.inG[: self.p0 + 1, : self.dim] = D_f + self.inh[: self.p0 + 1] = -f_k + for j in range(self.nI): - self.inG[self.p0+1+(1+self.pI)[:j].sum() : self.p0+1+(1+self.pI)[:j].sum() + self.pI[j]+1, :self.dim] = D_gI[j] - self.inh[self.p0+1+(1+self.pI)[:j].sum() : self.p0+1+(1+self.pI)[:j].sum() + self.pI[j]+1] = -gI_k[j] - - for j in range(self.nE): - self.inG[self.p0+1+(1+self.pI).sum()+(1+self.pE)[:j].sum() : self.p0+1+(1+self.pI).sum()+(1+self.pE)[:j].sum() + self.pE[j]+1, :self.dim] = D_gE[j] - self.inG[self.p0+1+(1+self.pI).sum()+(1+self.pE).sum()+(1+self.pE)[:j].sum() : self.p0+1+(1+self.pI).sum()+(1+self.pE).sum()+(1+self.pE)[:j].sum() + self.pE[j]+1, :self.dim] = -D_gE[j] - - self.inh[self.p0+1+(1+self.pI).sum()+(1+self.pE)[:j].sum() : self.p0+1+(1+self.pI).sum()+(1+self.pE)[:j].sum() + self.pE[j]+1] = -gE_k[j] - self.inh[self.p0+1+(1+self.pI).sum()+(1+self.pE).sum()+(1+self.pE)[:j].sum() : self.p0+1+(1+self.pI).sum()+(1+self.pE).sum()+(1+self.pE)[:j].sum() + self.pE[j]+1] = gE_k[j] - - - return - + self.inG[ + self.p0 + 1 + (1 + self.pI)[:j].sum() : self.p0 + + 1 + + (1 + self.pI)[:j].sum() + + self.pI[j] + + 1, + : self.dim, + ] = D_gI[j] + self.inh[ + self.p0 + 1 + (1 + self.pI)[:j].sum() : self.p0 + + 1 + + (1 + self.pI)[:j].sum() + + self.pI[j] + + 1 + ] = -gI_k[j] + for j in range(self.nE): + self.inG[ + self.p0 + 1 + (1 + self.pI).sum() + (1 + self.pE)[:j].sum() : self.p0 + + 1 + + (1 + self.pI).sum() + + (1 + self.pE)[:j].sum() + + self.pE[j] + + 1, + : self.dim, + ] = D_gE[j] + self.inG[ + self.p0 + + 1 + + (1 + self.pI).sum() + + (1 + self.pE).sum() + + (1 + self.pE)[:j].sum() : self.p0 + + 1 + + (1 + self.pI).sum() + + (1 + self.pE).sum() + + (1 + self.pE)[:j].sum() + + self.pE[j] + + 1, + : self.dim, + ] = -D_gE[j] + + self.inh[ + self.p0 + 1 + (1 + self.pI).sum() + (1 + self.pE)[:j].sum() : self.p0 + + 1 + + (1 + self.pI).sum() + + (1 + self.pE)[:j].sum() + + self.pE[j] + + 1 + ] = -gE_k[j] + self.inh[ + self.p0 + + 1 + + (1 + self.pI).sum() + + (1 + self.pE).sum() + + (1 + self.pE)[:j].sum() : self.p0 + + 1 + + (1 + self.pI).sum() + + (1 + self.pE).sum() + + (1 + self.pE)[:j].sum() + + self.pE[j] + + 1 + ] = gE_k[j] + return diff --git a/ncopt/torch_obj.py b/src/ncopt/torch_obj.py similarity index 61% rename from ncopt/torch_obj.py rename to src/ncopt/torch_obj.py index 3934ef6..9deb282 100755 --- a/ncopt/torch_obj.py +++ b/src/ncopt/torch_obj.py @@ -1,42 +1,46 @@ """ author: Fabian Schaipp """ -import numpy as np + import torch + class Net: - def __init__(self, D, dimOut = None): - self.name = 'pytorch_Net' + def __init__(self, D, dimOut=None): + self.name = "pytorch_Net" self.D = D - + self.D.zero_grad() - + self.dimIn = self.D[0].weight.shape[1] - + # set mode to evaluation self.D.train(False) - - #if type(self.D[-1]) == torch.nn.ReLU: + + # if type(self.D[-1]) == torch.nn.ReLU: if dimOut is None: print("Caution: output dimension of Net is not specified and derived from last module!") self.dimOut = self.D[-1].weight.shape[0] else: self.dimOut = dimOut return - - def eval(self, x): - assert len(x) == self.dimIn, f"Input for Net has wrong dimension, required dimension is {self.dimIn}." - + + def eval(self, x): + assert ( + len(x) == self.dimIn + ), f"Input for Net has wrong dimension, required dimension is {self.dimIn}." + return self.D.forward(torch.tensor(x, dtype=torch.float32)).detach().numpy() - + def grad(self, x): - assert len(x) == self.dimIn, f"Input for Net has wrong dimension, required dimension is {self.dimIn}." - + assert ( + len(x) == self.dimIn + ), f"Input for Net has wrong dimension, required dimension is {self.dimIn}." + x_torch = torch.tensor(x, dtype=torch.float32) x_torch.requires_grad_(True) - + y_torch = self.D(x_torch) y_torch.backward() return x_torch.grad.data.numpy() - \ No newline at end of file diff --git a/ncopt/utils.py b/src/ncopt/utils.py similarity index 85% rename from ncopt/utils.py rename to src/ncopt/utils.py index 3e6465c..fb38bad 100644 --- a/ncopt/utils.py +++ b/src/ncopt/utils.py @@ -1,11 +1,11 @@ -import torch -from torch.autograd.functional import jacobian -from torch.func import vmap, jacrev, jacfwd, functional_call - import logging from typing import Optional -#%% Computing Jacobians +import torch +from torch.autograd.functional import jacobian +from torch.func import functional_call, jacfwd, jacrev, vmap + +# %% Computing Jacobians """ Important: jacobian and jacrev do the forward pass for each row of the input, that is, WITHOUT the batch dimension! @@ -20,7 +20,7 @@ specified with -1, and not with x.shape[0] or similar! * For the Jacobian, we get an extra dimension in such cases --> needs to be removed later on -""" +""" """ @@ -38,6 +38,7 @@ * https://pytorch.org/tutorials/intermediate/jacobians_hessians.html """ + def compute_batch_jacobian_naive(model: torch.nn.Module, inputs: torch.Tensor): """ @@ -51,9 +52,11 @@ def compute_batch_jacobian_naive(model: torch.nn.Module, inputs: torch.Tensor): b = inputs.size(0) # want to have batch dimension --> double brackets return torch.stack([jacobian(model, inputs[i]) for i in range(b)]) - -def compute_batch_jacobian_vmap(model: torch.nn.Module, inputs: torch.Tensor, - forward: bool=False): + + +def compute_batch_jacobian_vmap( + model: torch.nn.Module, inputs: torch.Tensor, forward: bool = False +): """ Parameters @@ -62,28 +65,29 @@ def compute_batch_jacobian_vmap(model: torch.nn.Module, inputs: torch.Tensor, The function of which to compute the Jacobian. inputs : torch.Tensor The inputs for model. First dimension should be batch dimension. - forward: bool. + forward: bool. Whether to compute in forward mode (jacrev or jacfwd). By default False. """ params = dict(model.named_parameters()) - def fmodel(params, inputs): #functional version of model + def fmodel(params, inputs): # functional version of model return functional_call(model, params, inputs) # argnums specifies which argument to compute jacobian wrt # in_dims: dont map over params (None), map over first dim of inputs (0) if not forward: - return vmap(jacrev(fmodel, argnums=(1)), in_dims=(None,0))(params, inputs) + return vmap(jacrev(fmodel, argnums=(1)), in_dims=(None, 0))(params, inputs) else: - return vmap(jacfwd(fmodel, argnums=(1)), in_dims=(None,0))(params, inputs) + return vmap(jacfwd(fmodel, argnums=(1)), in_dims=(None, 0))(params, inputs) + -#%% +# %% # copied from https://github.com/aaronpmishkin/experiment_utils/blob/main/src/experiment_utils/utils.py#L298 def get_logger( name: str, - verbose: bool=False, - debug: bool=False, - log_file: Optional[str]=None, + verbose: bool = False, + debug: bool = False, + log_file: Optional[str] = None, ) -> logging.Logger: """Construct a logging.Logger instance with an appropriate configuration. diff --git a/ncopt/tests/test_jacobians.py b/tests/test_jacobians.py similarity index 62% rename from ncopt/tests/test_jacobians.py rename to tests/test_jacobians.py index f800973..23a6dd0 100644 --- a/ncopt/tests/test_jacobians.py +++ b/tests/test_jacobians.py @@ -1,8 +1,4 @@ import torch -import sys, os - -tests_path = os.path.dirname(os.path.abspath(__file__)) -sys.path.insert(0, tests_path + '/../..') from ncopt.utils import compute_batch_jacobian_naive, compute_batch_jacobian_vmap @@ -10,80 +6,82 @@ m = 3 b = 4 + class Quadratic(torch.nn.Module): def __init__(self, input_dim): super().__init__() self.A = torch.nn.Parameter(torch.randn((input_dim, input_dim))) def forward(self, x): - return (1/2) * x @ self.A @ x + return (1 / 2) * x @ self.A @ x class DummyNet(torch.nn.Module): def __init__(self, d=7, C=1, num_classes=10): super(DummyNet, self).__init__() - + self.conv = torch.nn.Conv2d(1, 8, kernel_size=5, stride=1, padding=2) - self.linear_input_dim = 8*C*d*d - self.linear = torch.nn.Linear(self.linear_input_dim, num_classes) + self.linear_input_dim = 8 * C * d * d + self.linear = torch.nn.Linear(self.linear_input_dim, num_classes) def forward(self, x): x = torch.nn.functional.relu(self.conv(x)) # x = x.view(x.shape[0], -1) # This would result in errors when computing Jacobian. - x = x.view(-1, self.linear_input_dim) # Batch dimension specified by -1. + x = x.view(-1, self.linear_input_dim) # Batch dimension specified by -1. x = self.linear(x) return x - + + ###################################################### #### Tests start here + def test_quadratic_jacobian(): torch.manual_seed(1) - inputs = torch.randn(b,d) + inputs = torch.randn(b, d) model = Quadratic(d) # f(x) = 0.5 x^T A x --> Df(x) = 0.5*(A+A.T)x - expected = 0.5 * inputs @ (model.A.T + model.A) + expected = 0.5 * inputs @ (model.A.T + model.A) - jac1 = compute_batch_jacobian_naive(model, inputs) - jac2 = compute_batch_jacobian_vmap(model, inputs) + jac1 = compute_batch_jacobian_naive(model, inputs) + jac2 = compute_batch_jacobian_vmap(model, inputs) assert torch.allclose(expected, jac1, rtol=1e-5, atol=1e-5) assert torch.allclose(jac1, jac2, rtol=1e-5, atol=1e-5) return + def test_forward_backward_jacobian(): torch.manual_seed(1) - inputs = torch.randn(b,d) + inputs = torch.randn(b, d) model = Quadratic(d) # f(x) = 0.5 x^T A x --> Df(x) = 0.5*(A+A.T)x - expected = 0.5 * inputs @ (model.A.T + model.A) + expected = 0.5 * inputs @ (model.A.T + model.A) - jac1 = compute_batch_jacobian_vmap(model, inputs, forward=False) - jac2 = compute_batch_jacobian_vmap(model, inputs, forward=True) + jac1 = compute_batch_jacobian_vmap(model, inputs, forward=False) + jac2 = compute_batch_jacobian_vmap(model, inputs, forward=True) assert torch.allclose(expected, jac1, rtol=1e-5, atol=1e-5) assert torch.allclose(jac1, jac2, rtol=1e-5, atol=1e-5) return -def test_multidim_output(): - model = torch.nn.Sequential(torch.nn.Linear(d,m), - torch.nn.Softmax(dim=-1)) +def test_multidim_output(): + model = torch.nn.Sequential(torch.nn.Linear(d, m), torch.nn.Softmax(dim=-1)) - inputs = torch.randn(b,d) + inputs = torch.randn(b, d) output = model(inputs) - assert output.shape == torch.Size([b,m]) + assert output.shape == torch.Size([b, m]) jac1 = compute_batch_jacobian_naive(model, inputs) jac2 = compute_batch_jacobian_vmap(model, inputs) - - assert jac1.shape == torch.Size([b,m,d]) - assert jac2.shape == torch.Size([b,m,d]) + assert jac1.shape == torch.Size([b, m, d]) + assert jac2.shape == torch.Size([b, m, d]) assert torch.allclose(jac1, jac2, rtol=1e-5, atol=1e-5) @@ -91,28 +89,22 @@ def test_multidim_output(): def test_multidim_output_multiaxis_input(): - pixel = 7 channel = 1 num_classes = 9 - input_dim = (channel,pixel,pixel) - inputs = torch.randn(b,*input_dim) - + input_dim = (channel, pixel, pixel) + inputs = torch.randn(b, *input_dim) + model = DummyNet(d=pixel, C=channel, num_classes=num_classes) output = model(inputs) - - assert output.shape == torch.Size([b,num_classes]) + + assert output.shape == torch.Size([b, num_classes]) jac1 = compute_batch_jacobian_naive(model, inputs) jac2 = compute_batch_jacobian_vmap(model, inputs) # this case has an extra dimension, due to the view operation - assert jac1.shape == torch.Size([b,1,num_classes,*input_dim]) - assert jac2.shape == torch.Size([b,1,num_classes,*input_dim]) + assert jac1.shape == torch.Size([b, 1, num_classes, *input_dim]) + assert jac2.shape == torch.Size([b, 1, num_classes, *input_dim]) assert torch.allclose(jac1, jac2, rtol=1e-5, atol=1e-5) - - - - - diff --git a/ncopt/tests/test_rosenbrock.py b/tests/test_rosenbrock.py similarity index 74% rename from ncopt/tests/test_rosenbrock.py rename to tests/test_rosenbrock.py index 9333c4d..dcf97dd 100755 --- a/ncopt/tests/test_rosenbrock.py +++ b/tests/test_rosenbrock.py @@ -3,31 +3,29 @@ """ import numpy as np -import sys, os -tests_path = os.path.dirname(os.path.abspath(__file__)) -sys.path.insert(0, tests_path + '/../..') - -from ncopt.sqpgs import SQPGS -from ncopt.funs import f_rosenbrock, g_max, g_linear +from ncopt.funs import f_rosenbrock, g_linear, g_max +from ncopt.sqpgs.main import SQPGS f = f_rosenbrock() g = g_max() + def test_rosenbrock_from_zero(): gI = [g] gE = [] - xstar = np.array([1/np.sqrt(2), 0.5]) + xstar = np.array([1 / np.sqrt(2), 0.5]) problem = SQPGS(f, gI, gE, tol=1e-8, max_iter=200, verbose=False) x_k = problem.solve() np.testing.assert_array_almost_equal(x_k, xstar, decimal=4) return + def test_rosenbrock_from_rand(): gI = [g] gE = [] - xstar = np.array([1/np.sqrt(2), 0.5]) + xstar = np.array([1 / np.sqrt(2), 0.5]) x0 = np.random.rand(2) problem = SQPGS(f, gI, gE, x0=x0, tol=1e-8, max_iter=200, verbose=False) x_k = problem.solve() @@ -35,6 +33,7 @@ def test_rosenbrock_from_rand(): return + def test_rosenbrock_with_eq(): g1 = g_linear(A=np.eye(2), b=np.ones(2)) gI = [] @@ -44,4 +43,4 @@ def test_rosenbrock_with_eq(): x_k = problem.solve() np.testing.assert_array_almost_equal(x_k, xstar, decimal=4) - return \ No newline at end of file + return