A toy mixed-norm regression problem#
Let \(f_k(x)\) be the sum of the \(k\) largest entries of \(|x|\).
It turns out that this function is a norm! Special cases:
\(f_1(x) = \|x\|_{\infty}\)
\(f_n(x) = \|x\|_1\).
Consider the minimum-norm regression problem:
\(\qquad \operatorname{minimize} ~f_k(x) + \gamma f_n(x) ~\text{ s.t. }~ A x = y\).
For a wide \(m \times n\) matrix \(A\), an \(m\)-vector \(y\), and a tuning parameter \(\gamma\).
The larger values of \(x\) are “more penalized” as \(\gamma\) gets smaller.
import cvxpy as cp
import itertools
def sum_abs_largest(expr, k):
# implement me!
return None
import numpy as np
import scipy.linalg as la
import scipy.sparse as sp
import time
def make_problem_data(n, m, true_k):
np.random.seed(1)
x_ref = np.zeros(n)
idxs = np.random.choice(np.arange(n), true_k, replace=False)
x_ref[idxs] = 10 + 100*np.random.rand(true_k)
A = np.random.randn(m, n)
v = np.random.normal(0, 1.0, m)
v /= (m*la.norm(v))
y = A @ x_ref + v
return A, x_ref, y
def make_and_solve(loss, x):
prob = cp.Problem(cp.Minimize(loss), [A @ x == y])
tic = time.time()
prob.solve(solver='ECOS')
toc = time.time()
t = toc - tic
x_est = x.value
return prob, x_est, t
n, m, true_k = 14, 7, 4
A, x_ref, y = make_problem_data(n, m, true_k)
x = cp.Variable(n)
gamma = cp.Parameter(pos=True)
gamma.value = 1e-3
k = 3
loss1 = sum_abs_largest(x, k) + gamma * cp.norm(x, 1)
prob1, x1, t1 = make_and_solve(loss1, x)
print(f'Solve time (home brewed)\n\t{t1}')
loss2 = cp.sum_largest(cp.abs(x), k) + gamma * cp.norm(x, 1)
prob2, x2, t2 = make_and_solve(loss2, x)
print(f'Solve time (cvxpy atom)\n\t{t2}')
disc = la.norm(x1 - x2) / min(la.norm(x1), la.norm(x2))
print(f'Discrepency between two solutions\n\t{disc}')
import matplotlib.pyplot as plt
%matplotlib inline
A1 = prob1.get_problem_data(solver='SCS')[0]['A']
plt.spy(A1, aspect='auto')
plt.show()
A2 = prob2.get_problem_data(solver='SCS')[0]['A']
plt.spy(A2, aspect='auto')
plt.show()