1
2 """
3 Created on Tue Jul 16 12:32:00 2013
4
5 Copyright (c) 2013-2014, CEA/DSV/I2BM/Neurospin. All rights reserved.
6
7 @author: Tommy Löfstedt
8 @email: tommy.loefstedt@cea.fr
9 @license: BSD 3-clause.
10 """
11 import numpy as np
12 from grad import grad_l1
13 from grad import grad_l2_squared
14 from grad import grad_tv
15 from utils import bisection_method
16
17 __all__ = ['load']
18
19
20 -def load(l, k, g, beta, M, e, A, snr=None, intercept=False):
21 """Returns data generated such that we know the exact solution.
22
23 The data generated by this function is fit to the Linear regression + L1 +
24 L2 + Total variation function, i.e. to:
25
26 f(b) = (1 / 2).|Xb - y|² + l.|b|_1 + (k / 2).|b|² + g.TV(b),
27
28 where |.|_1 is the L1 norm, |.|² is the squared L2 norm and TV is the
29 total variation penalty.
30
31 Parameters
32 ----------
33 l : The L1 regularisation parameter.
34
35 k : The L2 regularisation parameter.
36
37 g : The total variation regularisation parameter.
38
39 beta : The regression vector to generate data from.
40
41 M : The matrix to use when building data. This matrix carries the desired
42 correlation structure of the generated data. The generated data
43 will be a column-scaled version of this matrix.
44
45 e : The error vector e = Xb - y. This vector carries the desired
46 distribution of the residual.
47
48 A : The linear operator for the Nesterov function.
49
50 snr : Signal-to-noise ratio between model and residual.
51
52 intercept : Boolean. Whether or not to include an intercept variable. This
53 variable is not penalised. Note that if intercept is True, then e
54 will be centred.
55
56 Returns
57 -------
58 X : The generated X matrix.
59
60 y : The generated y vector.
61
62 beta : The regression vector with the correct snr.
63 """
64 l = float(l)
65 k = float(k)
66 g = float(g)
67
68 if intercept:
69 e = e - np.mean(e)
70
71 if snr != None:
72 def f(x):
73 X, y = _generate(l, k, g, x * beta, M, e, A, intercept)
74
75
76
77
78
79
80 return (np.linalg.norm(np.dot(X, x * beta)) \
81 / np.linalg.norm(e)) - snr
82
83 snr = bisection_method(f, low=0.0, high=np.sqrt(snr), maxiter=30)
84
85 beta = beta * snr
86
87 X, y = _generate(l, k, g, beta, M, e, A, intercept)
88
89 return X, y, beta
90
91
92 -def _generate(l, k, g, beta, M, e, A, intercept):
93
94 p = beta.shape[0]
95
96 if intercept:
97 gradL1 = grad_l1(beta[1:, :])
98 gradL2 = grad_l2_squared(beta[1:, :])
99 gradTV = grad_tv(beta[1:, :], A)
100 else:
101 gradL1 = grad_l1(beta)
102 gradL2 = grad_l2_squared(beta)
103 gradTV = grad_tv(beta, A)
104
105 alpha = -(l * gradL1 + k * gradL2 + g * gradTV)
106 Mte = np.dot(M.T, e)
107 if intercept:
108 alpha = np.divide(alpha, Mte[1:, :])
109 else:
110 alpha = np.divide(alpha, Mte)
111
112 X = np.ones(M.shape)
113 if intercept:
114 for i in xrange(p - 1):
115 X[:, i + 1] = M[:, i + 1] * alpha[i, 0]
116 else:
117 for i in xrange(p):
118 X[:, i] = M[:, i] * alpha[i, 0]
119
120 y = np.dot(X, beta) - e
121
122 return X, y
123