1
2 """
3 Created on Tue Jan 21 21:53:33 2014
4
5 Copyright (c) 2013-2014, CEA/DSV/I2BM/Neurospin. All rights reserved.
6
7 @author: Tommy Löfstedt
8 @email: tommy.loefstedt@cea.fr
9 @license: BSD 3-clause.
10 """
11 import numpy as np
12 from grad import grad_l1
13 from grad import grad_l2_squared
14 from grad import grad_glmu
15 from utils import bisection_method
16
17 __all__ = ['load']
18
19
20 -def load(l, k, g, beta, M, e, A, mu, snr=None, intercept=False):
21 """Returns data generated such that we know the exact solution.
22
23 The data generated by this function is fit to the Linear regression + L1 +
24 L2 + Smoothed group lasso function, i.e.:
25
26 f(b) = (1 / 2).|Xb - y|² + l.|b|_1 + (k / 2).|b|² + g.GLmu(b),
27
28 where |.|_1 is the L1 norm, |.|² is the squared L2 norm and GLmu is the
29 smoothed group lasso penalty.
30
31 Parameters
32 ----------
33 l : Non-negative float. The L1 regularisation parameter.
34
35 k : Non-negative float. The L2 regularisation parameter.
36
37 g : Non-negative float. The group lasso regularisation parameter.
38
39 beta : Numpy array (p-by-1). The regression vector to generate data from.
40
41 M : Numpy array (n-by-p). The matrix to use when building data. This
42 matrix carries the desired correlation structure of the generated
43 data. The generated data will be a column-scaled version of this
44 matrix.
45
46 e : Numpy array (n-by-1). The error vector e = Xb - y. This vector carries
47 the desired distribution of the residual.
48
49 A : Numpy or (usually) scipy.sparse array (K-by-p). The linear operator
50 for the Nesterov function.
51
52 mu : Non-negative float. The Nesterov smoothing regularisation parameter.
53
54 snr : Positive float. Signal-to-noise ratio between model and residual.
55
56 intercept : Boolean. Whether or not to include an intercept variable. This
57 variable is not penalised. Note that if intercept is True, then e
58 will be centred.
59
60 Returns
61 -------
62 X : Numpy array (n-by-p). The generated X matrix.
63
64 y : Numpy array (n-by-1). The generated y vector.
65
66 beta : Numpy array (p-by-1). The regression vector with the correct snr.
67 """
68 l = float(l)
69 k = float(k)
70 g = float(g)
71
72 if intercept:
73 e = e - np.mean(e)
74
75 if snr != None:
76 def f(x):
77 X, y = _generate(l, k, g, x * beta, M, e, A, mu, intercept)
78
79
80
81
82
83
84 return (np.linalg.norm(np.dot(X, x * beta)) / np.linalg.norm(e)) \
85 - snr
86
87 snr = bisection_method(f, low=0.0, high=np.sqrt(snr), maxiter=30)
88
89 beta = beta * snr
90
91 X, y = _generate(l, k, g, beta, M, e, A, mu, intercept)
92
93 return X, y, beta
94
95
96 -def _generate(l, k, g, beta, M, e, A, mu, intercept):
97
98 p = beta.shape[0]
99
100 if intercept:
101 gradL1 = grad_l1(beta[1:, :])
102 gradL2 = grad_l2_squared(beta[1:, :])
103 gradGLmu = grad_glmu(beta[1:, :], A, mu)
104 else:
105 gradL1 = grad_l1(beta)
106 gradL2 = grad_l2_squared(beta)
107 gradGLmu = grad_glmu(beta, A, mu)
108
109 alpha = -(l * gradL1 + k * gradL2 + g * gradGLmu)
110 Mte = np.dot(M.T, e)
111 if intercept:
112 alpha = np.divide(alpha, Mte[1:, :])
113 else:
114 alpha = np.divide(alpha, Mte)
115
116 X = np.ones(M.shape)
117 if intercept:
118 for i in xrange(p - 1):
119 X[:, i + 1] = M[:, i + 1] * alpha[i, 0]
120 else:
121 for i in xrange(p):
122 X[:, i] = M[:, i] * alpha[i, 0]
123
124 y = np.dot(X, beta) - e
125
126 return X, y
127