Figure 6.8:

Nonconvex function optimized with the distributed gradient algorithm.

Figure 6.8

Code for Figure 6.8

Text of the GNU GPL.

distributedgradient.py


  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
# [ustar, uiter] = distributedgradient(V, u0, ulb, uub, ...)
#
# Runs a distributed gradient algorithm for a problem with box constraints.
#
# V should be a function handle defining the objective function. u0 should be
# the starting point. ulb and uub should give the box constraints for u.
#
# Additional arguments are passed as 'key' value pairs. They are as follows:
#
# - 'beta', 'sigma' : parameters for line search (default 0.8 and 0.01)
# - 'alphamax' : maximum step to take in any iteration (default 10)
# - 'Niter' : Maximum number of iterations (default 25)
# - 'steptol', 'gradtol' : Absolute tolerances on stepsize and gradient.
#                          Algorithm terminates if the stepsize is less than
#                          steptol and the gradient is less than gradtol
#                          (defaults 1e-5 and 1e-6). Set either to a negative
#                          value to never stop early.
# - 'Nlinesearch' : maximum number of backtracking steps (default 100)
# - 'I' : cell array defining the variables for each system. Default is
#         each variable is a separate system.
#
# Return values are ustar, the final point, and uiter, a matrix of iteration
# progress.
import numpy as np
from casadi import SX, jacobian, Function

def distributedgradient(V, u0, ulb, uub, **kwargs):
    """
    Runs a distributed gradient algorithm for a problem with box constraints.
    """
    # Get options
    param = {
        'beta': 0.8,
        'sigma': 0.01,
        'alphamax': 15,
        'Niter': 50,
        'steptol': 1e-5,
        'gradtol': 1e-6,
        'Nlinesearch': 100,
        'I': None
    }
    param.update(kwargs)

    Nu = len(u0)
    if param['I'] is None:
        param['I'] = [[i] for i in range(Nu)]

    # Project initial condition to feasible space
    u0 = np.minimum(np.maximum(u0, ulb), uub)

    # Get gradient function via CasADi
    usym = SX.sym('u', Nu)
    vsym = V(usym)
    Jsym = jacobian(vsym, usym)
    dVcasadi = Function('V', [usym], [Jsym])

    # Start the loop
    k = 0
    uiter = np.full((Nu, param['Niter'] + 1), np.nan)
    uiter[:,0] = u0
    g, gred = calcgrad(dVcasadi, u0, ulb, uub)
    v = np.full(Nu, np.inf)

    while k < param['Niter'] and (np.linalg.norm(gred) > param['gradtol'] or np.linalg.norm(v) > param['steptol']):
        # Take step
        uiter[:,k+1], v = coopstep(param['I'], V, g, uiter[:,k], ulb, uub, param)
        k += 1

        # Update gradient
        g, gred = calcgrad(dVcasadi, uiter[:,k], ulb, uub)

    # Get final point and strip off extra points
    uiter = uiter[:,:k+1]
    ustar = uiter[:,-1]

    return ustar, uiter

def coopstep(I, V, g, u0, ulb, uub, param):
    """
    Takes one step of cooperative distributed gradient projection.
    """
    Nu = len(u0)
    Ni = len(I)
    us = np.full((Nu, Ni), np.nan)
    Vs = np.full(Ni, np.nan)

    # Run each optimization
    V0 = float(V(u0))
    for j in range(Ni):
        # Compute step
        v = np.zeros(Nu)
        i = I[j]
        v[i] = -g[i]
        ubar = np.minimum(np.maximum(u0 + v, ulb), uub)
        v = ubar - u0

        # Choose alpha
        if np.any(ubar == ulb) or np.any(ubar == uub):
            alpha = 1.0
        else:
            with np.errstate(divide='ignore'):
                alpha_lb = (ulb[i] - u0[i]) / v[i]
                alpha_ub = (uub[i] - u0[i]) / v[i]
            alpha = np.concatenate([alpha_lb, alpha_ub])
            alpha = alpha[np.isfinite(alpha) & (alpha >= 0)]
            if len(alpha) == 0:
                alpha = 0.0
            else:
                alpha = min(np.min(alpha), param['alphamax'])

        # Backtracking line search
        k = 1
        while float(V(u0 + alpha * v)) > V0 + param['sigma'] * alpha * np.dot(g[i].flatten(), v[i].flatten()) \
              and k <= param['Nlinesearch']:
            alpha = param['beta'] * alpha
            k += 1

        us[:, j] = u0 + alpha * v
        Vs[j] = float(V(us[:, j]))

    # Choose step to take
    w = np.ones(Ni) / Ni
    for j in range(Ni):
        # Check for descent
        u = us @ w
        if float(V(u)) <= np.dot(Vs, w):
            break
        elif j == Ni - 1:
            u = u0.copy()
            print('Warning: No descent directions!')
            break

        # Get rid of worst player
        jmax = np.argmax(Vs)
        Vs[jmax] = -np.finfo(float).max
        w[jmax] = 0
        w = w / np.sum(w)

    # Calculate actual step
    v = u - u0
    return u, v


def calcgrad(dV, u, ulb, uub):
    """
    Calculates gradient and reduced gradient at a given point.
    """
    g = np.array(dV(u)).flatten()
    gred = g.copy()
    gred[(u == ulb) & (g > 0)] = 0
    gred[(u == uub) & (g < 0)] = 0
    return g, gred

main.py


 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
# Example of nonconvex optmization using cooperative optimization.
# [depends] functions/distributedgradient.py
import sys
import os
import matplotlib.pyplot as plt
import numpy as np
import casadi as ca

sys.path.append('functions/')
from distributedgradient import distributedgradient
from stackcontours import stackcontours
from misc import gnuplotsave

nxy = 221
xvec = np.linspace(-0.5, 5, nxy)
yvec = np.linspace(-0.5, 5, nxy)
ncont = 15

def V_numpy(x, y):
    a = 1.1
    b = 0.4
    return (np.exp(-2*x) - 2*np.exp(-x) + np.exp(-2*y) - 2*np.exp(-y)
            + a*np.exp(-b*((x + 0.2)**2 + (y + 0.2)**2)))

def Vfunc(u):
    x, y = u[0], u[1]
    a = 1.1
    b = 0.4
    return (ca.exp(-2*x) - 2*ca.exp(-x) + ca.exp(-2*y) - 2*ca.exp(-y)
            + a*ca.exp(-b*((x + 0.2)**2 + (y + 0.2)**2)))

xgrid, ygrid = np.meshgrid(xvec, yvec)
Vgrid = V_numpy(xgrid, ygrid)

# Run distributed optimization for different initial conditions.
ulb = 0.1*np.ones(2)
uub = 4.0*np.ones(2)
U = np.array([[uub[0], ulb[0], ulb[0], uub[0], uub[0]],
              [uub[1], uub[1], ulb[0], ulb[0], uub[1]]])

Nu = 2
u0s = np.array([[0.5, 0.5], [3.9, 3.6], [2.99, 3.0]]).T
Nu0 = u0s.shape[1]
uiter = [None]*Nu0
for i in range(Nu0):
    _, uiter[i] = distributedgradient(Vfunc, u0s[:,i], ulb, uub)

# Make a plot.
if not os.getenv('OMIT_PLOTS') == 'true':
    plt.figure()
cs = plt.contour(xvec, yvec, Vgrid, ncont)
if not os.getenv('OMIT_PLOTS') == 'true':
    plt.plot(U[0,:], U[1,:], '-k')
    markers = 'os^<>v'
    for i in range(Nu0):
        plt.plot(uiter[i][0,:], uiter[i][1,:], f'-{markers[i]}k')