1
2 """
3 The :mod:`parsimony.algorithms.coordinate` module includes several algorithms
4 that minimises an implicit or explicit loss function by utilising a coordinate
5 or block coordinate descent.
6
7 Algorithms may not store states. I.e., if they are classes, do not keep
8 references to objects with state in the algorithm objects. It should be
9 possible to copy and share algorithms between e.g. estimators, and thus they
10 should not depend on any state. If they do, make sure that the state is
11 completely reset when reset() is called.
12
13 Created on Fri Aug 29 13:25:07 2014
14
15 Copyright (c) 2013-2014, CEA/DSV/I2BM/Neurospin. All rights reserved.
16
17 @author: Tommy Löfstedt
18 @email: tommy@compsol.se
19 @license: BSD 3-clause.
20 """
21 import numpy as np
22
23 try:
24 from . import bases
25 except ValueError:
26 import parsimony.algorithms.bases as bases
27 import parsimony.utils as utils
28 import parsimony.utils.maths as maths
29 import parsimony.utils.consts as consts
30 import parsimony.utils.start_vectors as start_vectors
31 import parsimony.functions as functions
32 import parsimony.functions.penalties as penalties
33 from parsimony.algorithms.utils import Info
34
35 __all__ = ["LassoCoordinateDescent", "ShootingAlgorithm"]
36
37
38 -class LassoCoordinateDescent(bases.ImplicitAlgorithm,
39 bases.IterativeAlgorithm,
40 bases.InformationAlgorithm):
41 """A coordinate descent algorithm for the lasso.
42
43 This algorithm is very similar to the ShootingAlgorithm, but uses soft
44 thresholding instead of splitting the cases of the subgradient of L1.
45 It seems ShootingAlgorithm is slightly faster.
46
47 Parameters
48 ----------
49 l : Non-negative float. The Lagrange multiplier, or regularisation
50 constant, of the Lasso.
51
52 mean : Boolean. Whether to compute the squared loss or the mean squared
53 loss. Default is True, the mean squared loss.
54
55 penalty_start : Non-negative integer. The number of columns, variables
56 etc., to be exempt from penalisation. Equivalently, the first index
57 to be penalised. Default is 0, all columns are included.
58
59 eps : Positive float. Tolerance for the stopping criterion.
60
61 info : List or tuple of utils.Info. What, if any, extra run information
62 should be stored. Default is an empty list, which means that no
63 run information is computed nor returned.
64
65 max_iter : Non-negative integer. Maximum allowed number of iterations.
66 Default is 10000.
67
68 min_iter : Non-negative integer less than or equal to max_iter. Minimum
69 number of iterations that must be performed. Default is 1.
70
71 Examples
72 --------
73 >>> from parsimony.algorithms.coordinate import LassoCoordinateDescent
74 >>> import parsimony.functions as functions
75 >>> import parsimony.functions.penalties as penalties
76 >>> import numpy as np
77 >>> np.random.seed(42)
78 >>> X = np.random.rand(100, 50)
79 >>> beta_star = np.random.rand(50, 1)
80 >>> beta_star[beta_star < 0.5] = 0.0
81 >>> y = np.dot(X, beta_star) + 0.001 * np.random.randn(100, 1)
82 >>> l = 0.0618
83 >>> alg = LassoCoordinateDescent(l)
84 >>> function = functions.CombinedFunction()
85 >>> function.add_function(functions.losses.LinearRegression(X, y,
86 ... mean=False))
87 >>> function.add_prox(penalties.L1(l=l))
88 >>> beta = alg.run(X, y)
89 >>> round(np.linalg.norm(beta_star - beta), 14)
90 0.34655181469595
91 """
92 INFO_PROVIDED = [Info.ok,
93 Info.num_iter,
94 Info.time,
95 Info.fvalue,
96 Info.converged]
97
103
104 super(LassoCoordinateDescent, self).__init__(info=info,
105 max_iter=max_iter,
106 min_iter=min_iter)
107
108 self.l = max(0.0, float(l))
109 self.mean = bool(mean)
110 self.penalty_start = max(0, int(penalty_start))
111 self.start_vector = start_vector
112 self.eps = max(consts.TOLERANCE, float(eps))
113
114 - def _f(self, Xbeta_y, y, beta):
115
116 n = y.shape[0]
117
118 if self.mean:
119 d = 2.0 * n
120 else:
121 d = 2.0
122
123 f = (1.0 / d) * np.sum(Xbeta_y ** 2.0)
124
125 if self.penalty_start > 0:
126 beta_ = beta[self.penalty_start:, :]
127 else:
128 beta_ = beta
129
130 f += self.l * maths.norm1(beta_)
131
132 return f
133
134 @bases.force_reset
135 - def run(self, X, y, beta=None):
136 """Find the minimiser of the associated function, starting at beta.
137
138 Parameters
139 ----------
140 X : Numpy array, shape n-by-p. The matrix X with independent
141 variables.
142
143 y : Numpy array, shape n-by-1. The response variable y.
144
145 beta : Numpy array. Optional starting point.
146 """
147 if self.info_requested(Info.ok):
148 self.info_set(Info.ok, False)
149 if self.info_requested(Info.time):
150 t = []
151 if self.info_requested(Info.fvalue):
152 f = []
153 if self.info_requested(Info.converged):
154 self.info_set(Info.converged, False)
155
156 n, p = X.shape
157
158 if beta is None:
159 beta = self.start_vector.get_vector(p)
160 else:
161 beta = beta.copy()
162
163 function = functions.CombinedFunction()
164 function.add_function(functions.losses.LinearRegression(X, y,
165 mean=False))
166 function.add_prox(penalties.L1(l=self.l))
167
168 xTx = np.sum(X ** 2.0, axis=0)
169 if self.mean:
170 xTx *= 1.0 / float(n)
171
172 for i in xrange(1, self.max_iter + 1):
173
174 if self.info_requested(Info.time):
175 tm = utils.time_cpu()
176
177
178
179 y_Xbeta = y - np.dot(X, beta)
180
181 betaold = beta.copy()
182 for j in xrange(p):
183
184 xj = X[:, [j]]
185 betaj = beta[j, 0]
186
187 if xTx[j] < consts.TOLERANCE:
188 bj = 0.0
189 else:
190 bj = np.dot(xj.T, y_Xbeta + xj * betaj)[0, 0]
191 if self.mean:
192 bj /= float(n)
193
194 if j < self.penalty_start:
195 bj = bj / xTx[j]
196 else:
197
198 bj = np.sign(bj) \
199 * max(0.0, (abs(bj) - self.l) / xTx[j])
200
201 y_Xbeta -= xj * (bj - betaj)
202 beta[j] = bj
203
204 if self.info_requested(Info.time):
205 t.append(utils.time_cpu() - tm)
206 if self.info_requested(Info.fvalue):
207 f_ = self._f(y_Xbeta, y, beta)
208 f.append(f_)
209
210
211 if maths.norm(beta - betaold) < self.eps \
212 and i >= self.min_iter:
213
214 if self.info_requested(Info.converged):
215 self.info_set(Info.converged, True)
216
217
218 break
219
220 self.num_iter = i
221 if self.info_requested(Info.num_iter):
222 self.info_set(Info.num_iter, i)
223 if self.info_requested(Info.time):
224 self.info_set(Info.time, t)
225 if self.info_requested(Info.fvalue):
226 self.info_set(Info.fvalue, f)
227 if self.info_requested(Info.ok):
228 self.info_set(Info.ok, True)
229
230 return beta
231
232
233 -class ShootingAlgorithm(bases.ImplicitAlgorithm,
234 bases.IterativeAlgorithm,
235 bases.InformationAlgorithm):
236 """The shooting algorithm for the lasso.
237
238 Parameters
239 ----------
240 l : Non-negative float. The Lagrange multiplier, or regularisation
241 constant, of the Lasso.
242
243 mean : Boolean. Whether to compute the squared loss or the mean squared
244 loss. Default is True, the mean squared loss.
245
246 penalty_start : Non-negative integer. The number of columns, variables
247 etc., to be exempt from penalisation. Equivalently, the first index
248 to be penalised. Default is 0, all columns are included.
249
250 eps : Positive float. Tolerance for the stopping criterion.
251
252 info : List or tuple of utils.Info. What, if any, extra run information
253 should be stored. Default is an empty list, which means that no
254 run information is computed nor returned.
255
256 max_iter : Non-negative integer. Maximum allowed number of iterations.
257 Default is 10000.
258
259 min_iter : Non-negative integer less than or equal to max_iter. Minimum
260 number of iterations that must be performed. Default is 1.
261
262 Examples
263 --------
264 >>> from parsimony.algorithms.coordinate import ShootingAlgorithm
265 >>> import parsimony.functions as functions
266 >>> import parsimony.functions.penalties as penalties
267 >>> import numpy as np
268 >>> np.random.seed(42)
269 >>> X = np.random.rand(100, 50)
270 >>> beta_star = np.random.rand(50, 1)
271 >>> beta_star[beta_star < 0.5] = 0.0
272 >>> y = np.dot(X, beta_star) + 0.001 * np.random.randn(100, 1)
273 >>> l = 0.0618
274 >>> alg = ShootingAlgorithm(l)
275 >>> function = functions.CombinedFunction()
276 >>> function.add_function(functions.losses.LinearRegression(X, y,
277 ... mean=False))
278 >>> function.add_prox(penalties.L1(l=l))
279 >>> beta = alg.run(X, y)
280 >>> round(np.linalg.norm(beta_star - beta), 14)
281 0.34655181469595
282 """
283 INFO_PROVIDED = [Info.ok,
284 Info.num_iter,
285 Info.time,
286 Info.fvalue,
287 Info.converged]
288
294
295 super(ShootingAlgorithm, self).__init__(info=info,
296 max_iter=max_iter,
297 min_iter=min_iter)
298
299 self.l = max(0.0, float(l))
300 self.mean = bool(mean)
301 self.penalty_start = max(0, int(penalty_start))
302 self.start_vector = start_vector
303 self.eps = max(consts.TOLERANCE, float(eps))
304
305 - def _f(self, Xbeta_y, y, beta):
306
307 n = y.shape[0]
308
309 if self.mean:
310 d = 2.0 * n
311 else:
312 d = 2.0
313
314 f = (1.0 / d) * np.sum(Xbeta_y ** 2.0)
315
316 if self.penalty_start > 0:
317 beta_ = beta[self.penalty_start:, :]
318 else:
319 beta_ = beta
320
321 f += self.l * maths.norm1(beta_)
322
323 return f
324
325 @bases.force_reset
326 - def run(self, X, y, beta=None):
327 """Find the minimiser of the associated function, starting at beta.
328
329 Parameters
330 ----------
331 X : Numpy array, shape n-by-p. The matrix X with independent
332 variables.
333
334 y : Numpy array, shape n-by-1. The response variable y.
335
336 beta : Numpy array. Optional starting point.
337 """
338 if self.info_requested(Info.ok):
339 self.info_set(Info.ok, False)
340 if self.info_requested(Info.time):
341 t = []
342 if self.info_requested(Info.fvalue):
343 f = []
344 if self.info_requested(Info.converged):
345 self.info_set(Info.converged, False)
346
347 n, p = X.shape
348
349 if beta is None:
350 beta = self.start_vector.get_vector(p)
351 else:
352 beta = beta.copy()
353
354 xTx = np.sum(X ** 2.0, axis=0)
355 if self.mean:
356 xTx *= 1.0 / float(n)
357
358 for i in xrange(1, self.max_iter + 1):
359
360 if self.info_requested(Info.time):
361 tm = utils.time_cpu()
362
363
364
365 Xbeta_y = np.dot(X, beta) - y
366
367 betaold = beta.copy()
368 for j in xrange(p):
369
370 xj = X[:, [j]]
371 betaj = beta[j]
372
373
374 if xTx[j] < consts.TOLERANCE:
375 bj = 0.0
376 else:
377
378 S0 = np.dot(xj.T, Xbeta_y - xj * betaj)[0, 0]
379 if self.mean:
380 S0 /= float(n)
381
382 if j < self.penalty_start:
383 bj = -S0 / xTx[j]
384 else:
385 if S0 > self.l:
386 bj = (self.l - S0) / xTx[j]
387 elif S0 < -self.l:
388 bj = (-self.l - S0) / xTx[j]
389 else:
390 bj = 0.0
391
392 Xbeta_y += xj * (bj - betaj)
393 beta[j] = bj
394
395 if self.info_requested(Info.time):
396 t.append(utils.time_cpu() - tm)
397 if self.info_requested(Info.fvalue):
398 f_ = self._f(Xbeta_y, y, beta)
399 f.append(f_)
400
401
402
403 if maths.norm(beta - betaold) < self.eps \
404 and i >= self.min_iter:
405
406 if self.info_requested(Info.converged):
407 self.info_set(Info.converged, True)
408
409
410 break
411
412 self.num_iter = i
413 if self.info_requested(Info.num_iter):
414 self.info_set(Info.num_iter, i)
415 if self.info_requested(Info.time):
416 self.info_set(Info.time, t)
417 if self.info_requested(Info.fvalue):
418 self.info_set(Info.fvalue, f)
419 if self.info_requested(Info.ok):
420 self.info_set(Info.ok, True)
421
422 return beta
423
424 if __name__ == "__main__":
425 import doctest
426 doctest.testmod()
427