import numpy as np
import matplotlib.pyplot as plt
np.random.seed(228)
# Parameters
n = 100 # Dimension of x
mu = 10
L = 100
def generate_problem(n=100, mu=mu, L=L, problem_type="clustered"):
np.random.seed(228)
if problem_type == "clustered":
A = np.diagflat([mu*np.ones(n//4), (mu+ (L-mu)/3) * np.ones(n//4), (mu+ 2*(L-mu)/3)*np.ones(n//4), L*np.ones(n//4)])
U = np.random.rand(n, n)
Q, _ = np.linalg.qr(U)
A = Q.dot(A).dot(Q.T)
A = (A + A.T) * 0.5
x_opt = np.random.rand(n)
b = A@x_opt
x_0 = 5*np.random.randn(n)
elif problem_type == "random":
A = np.random.randn(n, n)
factual_L = max(np.linalg.eigvalsh(A.T@A))
A = A.T.dot(A)/factual_L*L + mu*np.eye(n)
x_opt = np.random.rand(n)
b = A@x_opt
x_0 = 3*np.random.randn(n)
elif problem_type == "uniform spectrum":
A = np.diag(np.linspace(mu, L, n, endpoint=True))
x_opt = np.random.rand(n)
b = A@x_opt
x_0 = 3*np.random.randn(n)
elif problem_type == "Hilbert":
A = np.array([[1.0 / (i+j - 1) for i in range(1, n+1)] for j in range(1, n+1)])
b = np.ones(n)
x_0 = 3*np.random.randn(n)
x_opt = np.linalg.lstsq(A, b)[0]
elif problem_type == "worst_cg":
# Parameter t controls the condition number
t = 0.6 # Small t leads to worse conditioning
# Create tridiagonal matrix W
main_diag = np.ones(n)
main_diag[0] = t
main_diag[1:] = 1 + t
off_diag = np.sqrt(t) * np.ones(n-1)
A = np.diag(main_diag) + np.diag(off_diag, k=1) + np.diag(off_diag, k=-1)
# Create b vector [1, 0, ..., 0]
b = np.zeros(n)
b[0] = 1
# Since this is a specific problem, we compute x_opt explicitly
x_opt = np.linalg.solve(A, b)
x_0 = np.zeros(n) # Start from zero vector
return A, b, x_0, x_opt
return A, b, x_0, x_opt
# Optimization methods
def gradient_descent(f, grad_f, x_0, step_size, iterations, x_opt):
x = x_0.copy()
f_opt = f(x_opt)
values, gradients = [], []
values.append(abs(f(x) - f_opt))
gradients.append(np.linalg.norm(grad_f(x)))
for _ in range(iterations):
x -= step_size * grad_f(x)
values.append(abs(f(x) - f_opt))
gradients.append(np.linalg.norm(grad_f(x)))
return values, gradients
def steepest_descent(A, f, grad_f, x_0, iterations, x_opt):
x = x_0.copy()
f_opt = f(x_opt)
values, gradients = [], []
values.append(abs(f(x) - f_opt))
gradients.append(np.linalg.norm(grad_f(x)))
for _ in range(iterations):
grad = grad_f(x)
step_size = np.dot(grad.T, grad) / np.dot(grad.T, np.dot(A, grad))
x -= step_size * grad
values.append(abs(f(x) - f_opt))
gradients.append(np.linalg.norm(grad))
return values, gradients
def conjugate_gradient(A, b, x_0, iterations, x_opt):
x = x_0.copy()
f = lambda x: 0.5 * x.T @ A @ x - b.T @ x
f_opt = f(x_opt)
r = b - np.dot(A, x)
d = r.copy()
values, gradients = [f(x) - f_opt], [np.linalg.norm(r)]
for _ in range(iterations - 1):
## YOUR CODE HERE ##
pass
return values, gradients
def run_experiment(params):
A, b, x_0, x_opt = generate_problem(n=params["n"], mu=params["mu"], L=params["L"], problem_type=params["problem_type"])
eigs = np.linalg.eigvalsh(A)
mu, L = min(eigs), max(eigs)
f = lambda x: 0.5 * x.T @ A @ x - b.T @ x
grad_f = lambda x: A@x - b
if mu <= 1e-2:
alpha = 1/L
else:
alpha = 2/(mu+L) # Step size
beta = (np.sqrt(L) - np.sqrt(mu))/(np.sqrt(L) + np.sqrt(mu)) # Momentum parameter
results = {
"methods": {
"Gradient Descent": gradient_descent(f, grad_f, x_0, alpha, params["iterations"], x_opt),
"Steepest Descent": steepest_descent(A, f, grad_f, x_0, params["iterations"], x_opt),
"Conjugate Gradients": conjugate_gradient(A, b, x_0, params["iterations"], x_opt),
},
"problem":{
"eigs": eigs,
"params": params
}
}
return results
def plot_results(results):
linestyles = {
"Gradient Descent": "r-",
"Steepest Descent": "b-.",
"Conjugate Gradients": "g--"
}
plt.figure(figsize=(10, 3.5))
mu = results["problem"]["params"]["mu"]
L = results["problem"]["params"]["L"]
n = results["problem"]["params"]["n"]
problem_type = results["problem"]["params"]["problem_type"]
if mu > 1e-2:
plt.suptitle(f"Strongly convex quadratics. n={n}, {problem_type} matrix. ")
else:
plt.suptitle(f"Convex quadratics. n={n}, {problem_type} matrix. ")
plt.subplot(1, 3, 1)
eigs = results["problem"]["eigs"]
plt.scatter(np.arange(len(eigs)), eigs)
plt.xlabel('Dimension')
plt.ylabel('Eigenvalues of A')
plt.grid(linestyle=":")
plt.title("Eigenvalues")
if results["problem"]["params"]["problem_type"] == "Hilbert":
plt.yscale("log")
plt.subplot(1, 3, 2)
for method, result_ in results["methods"].items():
plt.semilogy(result_[0], linestyles[method])
plt.xlabel('Iteration')
plt.ylabel(r'$|f(x) -f^*|$')
plt.grid(linestyle=":")
plt.title("Function gap")
plt.subplot(1, 3, 3)
for method, result_ in results["methods"].items():
plt.semilogy(result_[1], linestyles[method], label=method)
plt.ylabel(r'$\|\nabla f(x)\|_2$')
plt.grid(linestyle=":")
plt.title("Norm of Gradient")
# Place the legend below the plots
plt.figlegend(loc='lower center', ncol=4, bbox_to_anchor=(0.5, -0.00))
# Adjust layout to make space for the legend below
plt.tight_layout(rect=[0, 0.05, 1, 1])
# plt.savefig(f"cg_{problem_type}_{mu}_{L}_{n}.pdf")
plt.show()f(x)=\dfrac{1}{2}x^TAx-b^Tx\longrightarrow \min_{x\in\mathbb{R}^n}
Add the code for iterations of the conjugate gradient method in the function conjugate_gradient():
After implementing the conjugate gradient method, let’s look at the several experimental results of solving a quadratic problem for different matrices A:
# Experiment parameters
params = {
"n": 60,
"mu": 1e-3,
"L": 100,
"iterations": 100,
"problem_type": "random",
}
results = run_experiment(params)
plot_results(results)# Experiment parameters
params = {
"n": 60,
"mu": 10,
"L": 100,
"iterations": 100,
"problem_type": "random",
}
results = run_experiment(params)
plot_results(results)# Experiment parameters
params = {
"n": 60,
"mu": 10,
"L": 1000,
"iterations": 500,
"problem_type": "clustered",
}
results = run_experiment(params)
plot_results(results)# Experiment parameters
params = {
"n": 600,
"mu": 10,
"L": 1000,
"iterations": 500,
"problem_type": "clustered",
}
results = run_experiment(params)
plot_results(results)# Experiment parameters
params = {
"n": 60,
"mu": 1,
"L": 10,
"iterations": 100,
"problem_type": "Hilbert",
}
results = run_experiment(params)
plot_results(results)