import numpy as np
import matplotlib.pyplot as plt
import sklearn.datasets as skldata
import jax
from jax import numpy as jnp
from scipy.optimize import minimize_scalar
np.random.seed(228)
def generate_problem(m=1000, n=300, mu=1):
np.random.seed(228)
# Generating synthetic data
n = 300 # Number of features
m = 1000 # Number of samples
# Create a binary classification problem
X, y = skldata.make_classification(n_classes=2, n_features=n, n_samples=m, n_informative=n//3, random_state=0)
X = jnp.array(X)
y = jnp.array(y)
# Regularized logistic regression cost function
@jax.jit
def f(w):
return jnp.linalg.norm(w)**2*mu/2 + jnp.mean(jnp.logaddexp(jnp.zeros(X.shape[0]), -y * (X @ w)))
grad_f = jax.jit(jax.grad(f))
x_0 = jax.random.normal(jax.random.PRNGKey(0), (n,))
return f, grad_f, x_0
# Optimization methods
def gradient_descent(f, grad_f, x_0, step_size, iterations):
x = x_0.copy()
values, gradients = [], []
values.append(f(x))
gradients.append(np.linalg.norm(grad_f(x)))
for _ in range(iterations):
x -= step_size * grad_f(x)
values.append(f(x))
gradients.append(np.linalg.norm(grad_f(x)))
return values, gradients
def steepest_descent(f, grad_f, x_0, iterations):
x = x_0.copy()
values, gradients = [], []
values.append(f(x))
gradients.append(np.linalg.norm(grad_f(x)))
for _ in range(iterations):
grad = grad_f(x)
res = minimize_scalar(lambda alpha: f(x - alpha * grad), bounds = (1e-8,1e1), method='Bounded', options={'maxiter': 50})
step_size = res.x
x -= step_size * grad
values.append(f(x))
gradients.append(np.linalg.norm(grad))
return values, gradients
def ConjugateGradientFR(f, grad_f, x0, iterations, restart=False):
x = x0
grad = grad_f(x)
values, gradients = [], []
values.append(f(x))
gradients.append(np.linalg.norm(grad_f(x)))
d = -grad
it = 0
while it < iterations:
res = minimize_scalar(lambda alpha: f(x + alpha * d), bounds = (1e-9,1e1), method='Bounded', options={'maxiter': 50})
alpha = res.x
x = x + alpha * d
values.append(f(x))
gradients.append(np.linalg.norm(grad))
grad_next = grad_f(x)
beta = grad_next.dot(grad_next) / grad.dot(grad)
d = -grad_next + beta * d
grad = grad_next.copy()
it += 1
if restart and it % restart == 0:
grad = grad_f(x)
d = -grad
return values, gradients
def ConjugateGradientPR(f, grad_f, x0, iterations, restart=False):
x = x0
grad = grad_f(x)
values, gradients = [], []
values.append(f(x))
gradients.append(np.linalg.norm(grad))
d = -grad
it = 0
while it < iterations:
# Line search for the optimal alpha
pass
## YOUR CODE HERE ##
## HINT: use scipy.minimize_scalar() ##
# Calculate beta using Polak-Ribière formula
pass
## YOUR CODE HERE ##
## HINT: use lecture notes ##
return values, gradients
def run_experiment(params):
f, grad_f, x_0 = generate_problem(n=params["n"], m=params["m"], mu=params["mu"])
if params["restart"] is None:
results = {
"methods": {
"Gradient Descent": gradient_descent(f, grad_f, x_0, params["alpha"], params["iterations"]),
"Steepest Descent": steepest_descent(f, grad_f, x_0, params["iterations"]),
"Conjugate Gradients PR": ConjugateGradientPR(f, grad_f, x_0, params["iterations"]),
"Conjugate Gradients FR": ConjugateGradientFR(f, grad_f, x_0, params["iterations"]),
},
"problem":{
"params": params
}
}
else:
results = {
"methods": {
"Gradient Descent": gradient_descent(f, grad_f, x_0, params["alpha"], params["iterations"]),
"Steepest Descent": steepest_descent(f, grad_f, x_0, params["iterations"]),
"Conjugate Gradients PR": ConjugateGradientPR(f, grad_f, x_0, params["iterations"]),
f"Conjugate Gradients PR. restart {params['restart']}": ConjugateGradientPR(f, grad_f, x_0, params["iterations"], restart=params["restart"]),
"Conjugate Gradients FR": ConjugateGradientFR(f, grad_f, x_0, params["iterations"]),
f"Conjugate Gradients FR. restart {params['restart']}": ConjugateGradientFR(f, grad_f, x_0, params["iterations"], restart=params["restart"]),
},
"problem":{
"params": params
}
}
return results
def plot_results(results):
linestyles = {
"Gradient Descent": "r-",
"Steepest Descent": "b-.",
"Conjugate Gradients FR": "g--",
f"Conjugate Gradients FR. restart {results['problem']['params']['restart']}": "g-",
"Conjugate Gradients PR": "c--",
f"Conjugate Gradients PR. restart {results['problem']['params']['restart']}": "c-",
}
plt.figure(figsize=(10, 3.5))
m = results["problem"]["params"]["m"]
mu = results["problem"]["params"]["mu"]
n = results["problem"]["params"]["n"]
restart = results["problem"]["params"]["restart"]
plt.suptitle(f"Regularized binary logistic regression. n={n}. m={m}. μ={mu}")
plt.subplot(1, 2, 1)
for method, result_ in results["methods"].items():
plt.semilogy(result_[0], linestyles[method])
plt.xlabel('Iteration')
plt.ylabel(r'$f(x)$')
plt.grid(linestyle=":")
plt.subplot(1, 2, 2)
for method, result_ in results["methods"].items():
plt.semilogy(result_[1], linestyles[method], label=method)
plt.ylabel(r'$\|\nabla f(x)\|_2$')
plt.xlabel('Iteration')
plt.grid(linestyle=":")
# Place the legend below the plots
if results['problem']['params']['restart'] == None:
plt.figlegend(loc='lower center', ncol=4, bbox_to_anchor=(0.5, -0.00))
plt.tight_layout(rect=[0, 0.05, 1, 1])
else:
plt.figlegend(loc='lower center', ncol=3, bbox_to_anchor=(0.5, -0.02))
plt.tight_layout(rect=[0, 0.1, 1, 1])
# Adjust layout to make space for the legend below
# plt.savefig(f"cg_non_linear_{m}_{n}_{mu}_{restart}.pdf")
plt.show()How can such methods be applied to non-quadratic problems? For example, for binary logistic regression: f(x)=\dfrac{\mu}{2}\| x \|^2_2 + \dfrac{1}{m}\sum_{i=1}^{m}log(1+exp(-y_i\langle a_i, x \rangle)) \longrightarrow \min_{x\in\mathbb{R}^n}
We can use the Fletcher-Reeves or Polak-Ribier method. Add the iteration in function ConjugateGradientPR() of the Polak-Ribier method to the code:
Run experiments for different \mu and dimentions:
# Experiment parameters
params = {
"n": 300,
"m": 1000,
"mu": 0,
"alpha": 1e-1,
"iterations": 200,
"restart": None
}
results = run_experiment(params)
plot_results(results)# Experiment parameters
params = {
"n": 300,
"m": 1000,
"mu": 1,
"alpha": 3e-2,
"iterations": 200,
"restart": None
}
results = run_experiment(params)
plot_results(results)# Experiment parameters
params = {
"n": 300,
"m": 1000,
"mu": 1,
"alpha": 3e-2,
"iterations": 200,
"restart": 20
}
results = run_experiment(params)
plot_results(results)# Experiment parameters
params = {
"n": 300,
"m": 1000,
"mu": 10,
"alpha": 1e-2,
"iterations": 200,
"restart": None
}
results = run_experiment(params)
plot_results(results)# Experiment parameters
params = {
"n": 300,
"m": 1000,
"mu": 10,
"alpha": 1e-2,
"iterations": 200,
"restart": 20
}
results = run_experiment(params)
plot_results(results)