1
2 """
3 Created on Mon May 12 15:46:47 2014
4
5 Copyright (c) 2013-2014, CEA/DSV/I2BM/Neurospin. All rights reserved.
6
7 @author: Tommy Löfstedt
8 @email: tommy.loefstedt@cea.fr
9 @license: BSD 3-clause.
10 """
11 import abc
12
13 import numpy as np
14
15 import utils
16
17 __all__ = ["SimulatedData", "LinearRegressionData"]
21
22 __metaclass__ = abc.ABCMeta
23
24 @abc.abstractmethod
25 - def load(self, beta):
26 raise NotImplementedError('Abstract method "load" must be '
27 'specialised!')
28
31 """Returns data for linear regression that is generated such that we know
32 the exact solution.
33
34 The data generated by this function is fit to the Linear regression + L1 +
35 L2 + Smoothed total variation function, i.e.:
36
37 f(b) = (1 / 2).|Xb - y|²_2 + sum_{i=1}^N l_i.P_i(b),
38
39 where the P_i(.) are penalty functions.
40
41 Parameters
42 ----------
43 penalties : List or tuple. The penalties to add to the linear regression
44 function.
45
46 X0 : Numpy array (n-by-p). The initial matrix to use when building data.
47 This matrix carries the desired correlation structure of the
48 generated data. The generated data will be a column-scaled version
49 of this matrix.
50
51 e : Numpy array (n-by-1). The error vector e = Xb - y. This vector carries
52 the desired distribution of the residual.
53
54 snr : Positive float. Signal-to-noise ratio between model and residual.
55
56 intercept : Boolean. Whether or not to include an intercept variable. This
57 variable is not penalised. Note that if intercept is True, then e
58 will be centred.
59
60 Returns
61 -------
62 X : Numpy array (n-by-p). The generated X matrix.
63
64 y : Numpy array (n-by-1). The generated y vector.
65
66 beta : Numpy array (p-by-1). The regression vector with the correct snr (if
67 snr is given).
68 """
69 - def __init__(self, penalties, X0, e, snr=None, intercept=False):
70
71 super(LinearRegressionData, self).__init__()
72
73 self.penalties = list(penalties)
74 self.X0 = X0
75 self.intercept = bool(intercept)
76 self.e = e - np.mean(e) if intercept else e
77 self.snr = float(snr) if snr is not None else None
78
79 - def load(self, beta):
80 """Generate the simulated data.
81
82 Parameters
83 ----------
84 beta : Numpy array (p-by-1). The regression vector to generate data
85 from.
86 """
87 if self.snr != None:
88 old_snr = self.snr
89 self.snr = None
90 try:
91 def f(x):
92 X, y, _ = self.load(x * beta)
93
94 print "snr = %.5f = %.5f = |X.b| / |e| = %.5f / %.5f" \
95 % (old_snr, np.linalg.norm(np.dot(X, x * beta)) \
96 / np.linalg.norm(self.e),
97 np.linalg.norm(np.dot(X, x * beta)),
98 np.linalg.norm(self.e))
99
100 return (np.linalg.norm(np.dot(X, x * beta)) \
101 / np.linalg.norm(self.e)) - old_snr
102
103 snr = utils.bisection_method(f,
104 low=0.0,
105 high=np.sqrt(old_snr),
106 maxiter=30)
107
108 finally:
109 self.snr = old_snr
110
111 beta = beta * snr
112
113 grad = 0.0
114 for p in self.penalties:
115 grad -= p.grad(beta[1:, :] if self.intercept else beta)
116
117 Mte = np.dot(self.X0.T, self.e)
118 if self.intercept:
119 Mte = Mte[1:, :]
120
121 alpha = np.divide(grad, Mte)
122
123 p = beta.shape[0]
124
125 start = 1 if self.intercept else 0
126 X = np.ones(self.X0.shape)
127 for i in xrange(start, p):
128 X[:, i] = self.X0[:, i] * alpha[i - start, 0]
129
130 y = np.dot(X, beta) - self.e
131
132 return X, y, beta
133
134 if __name__ == "__main__":
135 import doctest
136 doctest.testmod()
137