parsimony.algorithms.coordinate

1 # -*- coding: utf-8 -*- 2 """ 3 The :mod:`parsimony.algorithms.coordinate` module includes several algorithms 4 that minimises an implicit or explicit loss function by utilising a coordinate 5 or block coordinate descent. 6 7 Algorithms may not store states. I.e., if they are classes, do not keep 8 references to objects with state in the algorithm objects. It should be 9 possible to copy and share algorithms between e.g. estimators, and thus they 10 should not depend on any state. If they do, make sure that the state is 11 completely reset when reset() is called. 12 13 Created on Fri Aug 29 13:25:07 2014 14 15 Copyright (c) 2013-2014, CEA/DSV/I2BM/Neurospin. All rights reserved. 16 17 @author: Tommy Löfstedt 18 @email: tommy@compsol.se 19 @license: BSD 3-clause. 20 """ 21 import numpy as np 22 23 try: 24 from . import bases # Only works when imported as a package. 25 except ValueError: 26 import parsimony.algorithms.bases as bases # When run as a program. 27 import parsimony.utils as utils 28 import parsimony.utils.maths as maths 29 import parsimony.utils.consts as consts 30 import parsimony.utils.start_vectors as start_vectors 31 import parsimony.functions as functions 32 import parsimony.functions.penalties as penalties 33 from parsimony.algorithms.utils import Info 34 35 __all__ = ["LassoCoordinateDescent", "ShootingAlgorithm"]

36 37 38 -class LassoCoordinateDescent(bases.ImplicitAlgorithm, 39 bases.IterativeAlgorithm, 40 bases.InformationAlgorithm):

41 """A coordinate descent algorithm for the lasso. 42 43 This algorithm is very similar to the ShootingAlgorithm, but uses soft 44 thresholding instead of splitting the cases of the subgradient of L1. 45 It seems ShootingAlgorithm is slightly faster. 46 47 Parameters 48 ---------- 49 l : Non-negative float. The Lagrange multiplier, or regularisation 50 constant, of the Lasso. 51 52 mean : Boolean. Whether to compute the squared loss or the mean squared 53 loss. Default is True, the mean squared loss. 54 55 penalty_start : Non-negative integer. The number of columns, variables 56 etc., to be exempt from penalisation. Equivalently, the first index 57 to be penalised. Default is 0, all columns are included. 58 59 eps : Positive float. Tolerance for the stopping criterion. 60 61 info : List or tuple of utils.Info. What, if any, extra run information 62 should be stored. Default is an empty list, which means that no 63 run information is computed nor returned. 64 65 max_iter : Non-negative integer. Maximum allowed number of iterations. 66 Default is 10000. 67 68 min_iter : Non-negative integer less than or equal to max_iter. Minimum 69 number of iterations that must be performed. Default is 1. 70 71 Examples 72 -------- 73 >>> from parsimony.algorithms.coordinate import LassoCoordinateDescent 74 >>> import parsimony.functions as functions 75 >>> import parsimony.functions.penalties as penalties 76 >>> import numpy as np 77 >>> np.random.seed(42) 78 >>> X = np.random.rand(100, 50) 79 >>> beta_star = np.random.rand(50, 1) 80 >>> beta_star[beta_star < 0.5] = 0.0 81 >>> y = np.dot(X, beta_star) + 0.001 * np.random.randn(100, 1) 82 >>> l = 0.0618 83 >>> alg = LassoCoordinateDescent(l) 84 >>> function = functions.CombinedFunction() 85 >>> function.add_function(functions.losses.LinearRegression(X, y, 86 ... mean=False)) 87 >>> function.add_prox(penalties.L1(l=l)) 88 >>> beta = alg.run(X, y) 89 >>> round(np.linalg.norm(beta_star - beta), 14) 90 0.34655181469595 91 """ 92 INFO_PROVIDED = [Info.ok, 93 Info.num_iter, 94 Info.time, 95 Info.fvalue, 96 Info.converged] 97

98 - def __init__(self, l, mean=True, penalty_start=0, 99 start_vector=start_vectors.RandomStartVector( 100 limits=(-1.0, 1.0)), 101 eps=consts.TOLERANCE, 102 info=[], max_iter=10000, min_iter=1):

103 104 super(LassoCoordinateDescent, self).__init__(info=info, 105 max_iter=max_iter, 106 min_iter=min_iter) 107 108 self.l = max(0.0, float(l)) 109 self.mean = bool(mean) 110 self.penalty_start = max(0, int(penalty_start)) 111 self.start_vector = start_vector 112 self.eps = max(consts.TOLERANCE, float(eps))

113

114 - def _f(self, Xbeta_y, y, beta):

115 116 n = y.shape[0] 117 118 if self.mean: 119 d = 2.0 * n 120 else: 121 d = 2.0 122 123 f = (1.0 / d) * np.sum(Xbeta_y ** 2.0) 124 125 if self.penalty_start > 0: 126 beta_ = beta[self.penalty_start:, :] 127 else: 128 beta_ = beta 129 130 f += self.l * maths.norm1(beta_) 131 132 return f

133 134 @bases.force_reset

135 - def run(self, X, y, beta=None):

136 """Find the minimiser of the associated function, starting at beta. 137 138 Parameters 139 ---------- 140 X : Numpy array, shape n-by-p. The matrix X with independent 141 variables. 142 143 y : Numpy array, shape n-by-1. The response variable y. 144 145 beta : Numpy array. Optional starting point. 146 """ 147 if self.info_requested(Info.ok): 148 self.info_set(Info.ok, False) 149 if self.info_requested(Info.time): 150 t = [] 151 if self.info_requested(Info.fvalue): 152 f = [] 153 if self.info_requested(Info.converged): 154 self.info_set(Info.converged, False) 155 156 n, p = X.shape 157 158 if beta is None: 159 beta = self.start_vector.get_vector(p) 160 else: 161 beta = beta.copy() 162 163 function = functions.CombinedFunction() 164 function.add_function(functions.losses.LinearRegression(X, y, 165 mean=False)) 166 function.add_prox(penalties.L1(l=self.l)) 167 168 xTx = np.sum(X ** 2.0, axis=0) 169 if self.mean: 170 xTx *= 1.0 / float(n) 171 172 for i in xrange(1, self.max_iter + 1): 173 174 if self.info_requested(Info.time): 175 tm = utils.time_cpu() 176 177 # The update has an error that propagates. This resets the 178 # approximation. We may not need to do this at every iteration. 179 y_Xbeta = y - np.dot(X, beta) 180 181 betaold = beta.copy() 182 for j in xrange(p): 183 184 xj = X[:, [j]] 185 betaj = beta[j, 0] 186 187 if xTx[j] < consts.TOLERANCE: # Avoid division-by-zero. 188 bj = 0.0 189 else: 190 bj = np.dot(xj.T, y_Xbeta + xj * betaj)[0, 0] 191 if self.mean: 192 bj /= float(n) 193 194 if j < self.penalty_start: 195 bj = bj / xTx[j] 196 else: 197 # Soft thresholding. 198 bj = np.sign(bj) \ 199 * max(0.0, (abs(bj) - self.l) / xTx[j]) 200 201 y_Xbeta -= xj * (bj - betaj) # Update X.beta. 202 beta[j] = bj # Save result. 203 204 if self.info_requested(Info.time): 205 t.append(utils.time_cpu() - tm) 206 if self.info_requested(Info.fvalue): 207 f_ = self._f(y_Xbeta, y, beta) 208 f.append(f_) 209 210 # print "err:", maths.norm(beta - betaold) 211 if maths.norm(beta - betaold) < self.eps \ 212 and i >= self.min_iter: 213 214 if self.info_requested(Info.converged): 215 self.info_set(Info.converged, True) 216 217 # print "iterations: ", i 218 break 219 220 self.num_iter = i 221 if self.info_requested(Info.num_iter): 222 self.info_set(Info.num_iter, i) 223 if self.info_requested(Info.time): 224 self.info_set(Info.time, t) 225 if self.info_requested(Info.fvalue): 226 self.info_set(Info.fvalue, f) 227 if self.info_requested(Info.ok): 228 self.info_set(Info.ok, True) 229 230 return beta

231

232 233 -class ShootingAlgorithm(bases.ImplicitAlgorithm, 234 bases.IterativeAlgorithm, 235 bases.InformationAlgorithm):

236 """The shooting algorithm for the lasso. 237 238 Parameters 239 ---------- 240 l : Non-negative float. The Lagrange multiplier, or regularisation 241 constant, of the Lasso. 242 243 mean : Boolean. Whether to compute the squared loss or the mean squared 244 loss. Default is True, the mean squared loss. 245 246 penalty_start : Non-negative integer. The number of columns, variables 247 etc., to be exempt from penalisation. Equivalently, the first index 248 to be penalised. Default is 0, all columns are included. 249 250 eps : Positive float. Tolerance for the stopping criterion. 251 252 info : List or tuple of utils.Info. What, if any, extra run information 253 should be stored. Default is an empty list, which means that no 254 run information is computed nor returned. 255 256 max_iter : Non-negative integer. Maximum allowed number of iterations. 257 Default is 10000. 258 259 min_iter : Non-negative integer less than or equal to max_iter. Minimum 260 number of iterations that must be performed. Default is 1. 261 262 Examples 263 -------- 264 >>> from parsimony.algorithms.coordinate import ShootingAlgorithm 265 >>> import parsimony.functions as functions 266 >>> import parsimony.functions.penalties as penalties 267 >>> import numpy as np 268 >>> np.random.seed(42) 269 >>> X = np.random.rand(100, 50) 270 >>> beta_star = np.random.rand(50, 1) 271 >>> beta_star[beta_star < 0.5] = 0.0 272 >>> y = np.dot(X, beta_star) + 0.001 * np.random.randn(100, 1) 273 >>> l = 0.0618 274 >>> alg = ShootingAlgorithm(l) 275 >>> function = functions.CombinedFunction() 276 >>> function.add_function(functions.losses.LinearRegression(X, y, 277 ... mean=False)) 278 >>> function.add_prox(penalties.L1(l=l)) 279 >>> beta = alg.run(X, y) 280 >>> round(np.linalg.norm(beta_star - beta), 14) 281 0.34655181469595 282 """ 283 INFO_PROVIDED = [Info.ok, 284 Info.num_iter, 285 Info.time, 286 Info.fvalue, 287 Info.converged] 288

289 - def __init__(self, l, mean=True, penalty_start=0, 290 start_vector=start_vectors.RandomStartVector( 291 limits=(-1.0, 1.0)), 292 eps=consts.TOLERANCE, 293 info=[], max_iter=10000, min_iter=1):

294 295 super(ShootingAlgorithm, self).__init__(info=info, 296 max_iter=max_iter, 297 min_iter=min_iter) 298 299 self.l = max(0.0, float(l)) 300 self.mean = bool(mean) 301 self.penalty_start = max(0, int(penalty_start)) 302 self.start_vector = start_vector 303 self.eps = max(consts.TOLERANCE, float(eps))

304

305 - def _f(self, Xbeta_y, y, beta):

306 307 n = y.shape[0] 308 309 if self.mean: 310 d = 2.0 * n 311 else: 312 d = 2.0 313 314 f = (1.0 / d) * np.sum(Xbeta_y ** 2.0) 315 316 if self.penalty_start > 0: 317 beta_ = beta[self.penalty_start:, :] 318 else: 319 beta_ = beta 320 321 f += self.l * maths.norm1(beta_) 322 323 return f

324 325 @bases.force_reset

326 - def run(self, X, y, beta=None):

327 """Find the minimiser of the associated function, starting at beta. 328 329 Parameters 330 ---------- 331 X : Numpy array, shape n-by-p. The matrix X with independent 332 variables. 333 334 y : Numpy array, shape n-by-1. The response variable y. 335 336 beta : Numpy array. Optional starting point. 337 """ 338 if self.info_requested(Info.ok): 339 self.info_set(Info.ok, False) 340 if self.info_requested(Info.time): 341 t = [] 342 if self.info_requested(Info.fvalue): 343 f = [] 344 if self.info_requested(Info.converged): 345 self.info_set(Info.converged, False) 346 347 n, p = X.shape 348 349 if beta is None: 350 beta = self.start_vector.get_vector(p) 351 else: 352 beta = beta.copy() 353 354 xTx = np.sum(X ** 2.0, axis=0) 355 if self.mean: 356 xTx *= 1.0 / float(n) 357 358 for i in xrange(1, self.max_iter + 1): 359 360 if self.info_requested(Info.time): 361 tm = utils.time_cpu() 362 363 # The update has an error that propagates. This resets the 364 # approximation. We may not need to do this at every iteration. 365 Xbeta_y = np.dot(X, beta) - y 366 367 betaold = beta.copy() 368 for j in xrange(p): 369 370 xj = X[:, [j]] 371 betaj = beta[j] 372 373 # Solve for beta[j]. 374 if xTx[j] < consts.TOLERANCE: # Avoid division-by-zero. 375 bj = 0.0 376 else: 377 # Intercept. 378 S0 = np.dot(xj.T, Xbeta_y - xj * betaj)[0, 0] 379 if self.mean: 380 S0 /= float(n) 381 382 if j < self.penalty_start: 383 bj = -S0 / xTx[j] 384 else: 385 if S0 > self.l: 386 bj = (self.l - S0) / xTx[j] 387 elif S0 < -self.l: 388 bj = (-self.l - S0) / xTx[j] 389 else: 390 bj = 0.0 391 392 Xbeta_y += xj * (bj - betaj) # Update X.beta. 393 beta[j] = bj # Save result. 394 395 if self.info_requested(Info.time): 396 t.append(utils.time_cpu() - tm) 397 if self.info_requested(Info.fvalue): 398 f_ = self._f(Xbeta_y, y, beta) 399 f.append(f_) 400 # print "f:", f[-1] 401 402 # print "err:", maths.norm(beta - betaold) 403 if maths.norm(beta - betaold) < self.eps \ 404 and i >= self.min_iter: 405 406 if self.info_requested(Info.converged): 407 self.info_set(Info.converged, True) 408 409 # print "iterations: ", i 410 break 411 412 self.num_iter = i 413 if self.info_requested(Info.num_iter): 414 self.info_set(Info.num_iter, i) 415 if self.info_requested(Info.time): 416 self.info_set(Info.time, t) 417 if self.info_requested(Info.fvalue): 418 self.info_set(Info.fvalue, f) 419 if self.info_requested(Info.ok): 420 self.info_set(Info.ok, True) 421 422 return beta

423 424 if __name__ == "__main__": 425 import doctest 426 doctest.testmod() 427

Source Code for Module parsimony.algorithms.coordinate