parsimony.functions.losses

1 # -*- coding: utf-8 -*- 2 """ 3 The :mod:`parsimony.functions.losses` module contains the loss functions used 4 throughout the package. These represent mathematical functions and should thus 5 have properties used by the corresponding algorithms. These properties are 6 defined in :mod:`parsimony.functions.properties`. 7 8 Loss functions should be stateless. Loss functions may be shared and copied 9 and should therefore not hold anything that cannot be recomputed the next time 10 it is called. 11 12 Created on Mon Apr 22 10:54:29 2013 13 14 Copyright (c) 2013-2014, CEA/DSV/I2BM/Neurospin. All rights reserved. 15 16 @author: Tommy Löfstedt, Vincent Guillemot, Edouard Duchesnay and 17 Fouad Hadj-Selem 18 @email: lofstedt.tommy@gmail.com, edouard.duchesnay@cea.fr 19 @license: BSD 3-clause. 20 """ 21 import numpy as np 22 23 try: 24 from . import properties # Only works when imported as a package. 25 except ValueError: 26 import parsimony.functions.properties as properties # Run as a script. 27 import parsimony.utils as utils 28 import parsimony.utils.consts as consts 29 30 __all__ = ["LinearRegression", "RidgeRegression", 31 "LogisticRegression", "RidgeLogisticRegression", 32 "LatentVariableVariance", "LinearFunction"]

33 34 35 -class LinearRegression(properties.CompositeFunction, 36 properties.Gradient, 37 properties.LipschitzContinuousGradient, 38 properties.StepSize):

39 """The Linear regression loss function. 40 """

41 - def __init__(self, X, y, mean=True):

42 """ 43 Parameters 44 ---------- 45 X : Numpy array (n-by-p). The regressor matrix. 46 47 y : Numpy array (n-by-1). The regressand vector. 48 49 k : Non-negative float. The ridge parameter. 50 51 mean : Boolean. Whether to compute the squared loss or the mean 52 squared loss. Default is True, the mean squared loss. 53 """ 54 self.X = X 55 self.y = y 56 57 self.mean = bool(mean) 58 59 self.reset()

60

61 - def reset(self):

62 """Free any cached computations from previous use of this Function. 63 64 From the interface "Function". 65 """ 66 self._L = None

67

68 - def f(self, beta):

69 """Function value. 70 71 From the interface "Function". 72 73 Parameters 74 ---------- 75 beta : Numpy array. Regression coefficient vector. The point at which 76 to evaluate the function. 77 """ 78 if self.mean: 79 d = 2.0 * float(self.X.shape[0]) 80 else: 81 d = 2.0 82 83 f = (1.0 / d) * np.sum((np.dot(self.X, beta) - self.y) ** 2.0) 84 85 return f

86

87 - def grad(self, beta):

88 """Gradient of the function at beta. 89 90 From the interface "Gradient". 91 92 Parameters 93 ---------- 94 beta : The point at which to evaluate the gradient. 95 96 Examples 97 -------- 98 >>> import numpy as np 99 >>> from parsimony.functions.losses import LinearRegression 100 >>> 101 >>> np.random.seed(42) 102 >>> X = np.random.rand(100, 150) 103 >>> y = np.random.rand(100, 1) 104 >>> lr = LinearRegression(X=X, y=y) 105 >>> beta = np.random.rand(150, 1) 106 >>> round(np.linalg.norm(lr.grad(beta) 107 ... - lr.approx_grad(beta, eps=1e-4)), 9) 108 1.3e-08 109 """ 110 grad = np.dot(self.X.T, np.dot(self.X, beta) - self.y) 111 112 if self.mean: 113 grad *= 1.0 / float(self.X.shape[0]) 114 115 return grad

116

117 - def L(self):

118 """Lipschitz constant of the gradient. 119 120 From the interface "LipschitzContinuousGradient". 121 122 Examples 123 -------- 124 >>> import numpy as np 125 >>> from parsimony.functions.losses import LinearRegression 126 >>> 127 >>> np.random.seed(42) 128 >>> X = np.random.rand(10, 15) 129 >>> y = np.random.rand(10, 1) 130 >>> lr = LinearRegression(X=X, y=y) 131 >>> L = lr.L() 132 >>> L_ = lr.approx_L((15, 1), 10000) 133 >>> L >= L_ 134 True 135 >>> round((L - L_) / L, 14) 136 0.14039091870818 137 """ 138 if self._L is None: 139 140 from parsimony.algorithms.nipals import FastSVD 141 142 # Rough limits for when FastSVD is faster than np.linalg.svd. 143 n, p = self.X.shape 144 if (max(n, p) > 500 and max(n, p) <= 1000 \ 145 and float(max(n, p)) / min(n, p) <= 1.3) \ 146 or (max(n, p) > 1000 and max(n, p) <= 5000 \ 147 and float(max(n, p)) / min(n, p) <= 5.0) \ 148 or (max(n, p) > 5000 and max(n, p) <= 10000 \ 149 and float(max(n, p)) / min(n, p) <= 15.0) \ 150 or (max(n, p) > 10000 and max(n, p) <= 20000 \ 151 and float(max(n, p)) / min(n, p) <= 200.0) \ 152 or max(n, p) > 10000: 153 154 v = FastSVD().run(self.X, max_iter=1000) 155 us = np.dot(self.X, v) 156 self._L = np.sum(us ** 2.0) 157 158 else: 159 s = np.linalg.svd(self.X, 160 full_matrices=False, compute_uv=False) 161 self._L = np.max(s) ** 2.0 162 163 if self.mean: 164 self._L /= float(n) 165 166 return self._L

167

168 - def step(self, beta, index=0):

169 """The step size to use in descent methods. 170 171 Parameters 172 ---------- 173 beta : Numpy array. The point at which to determine the step size. 174 """ 175 return 1.0 / self.L()

176

177 178 -class RidgeRegression(properties.CompositeFunction, 179 properties.Gradient, 180 properties.LipschitzContinuousGradient, 181 properties.StronglyConvex, 182 properties.StepSize):

183 """The Ridge Regression function, i.e. a representation of 184 185 f(x) = (0.5 / n) * ||Xb - y||²_2 + lambda * 0.5 * ||b||²_2, 186 187 where ||.||²_2 is the L2 norm. 188 """ 189 # TODO: Inherit from LinearRegression and add an L2 constraint instead!

190 - def __init__(self, X, y, k, penalty_start=0, mean=True):

191 """ 192 Parameters 193 ---------- 194 X : Numpy array (n-by-p). The regressor matrix. 195 196 y : Numpy array (n-by-1). The regressand vector. 197 198 k : Non-negative float. The ridge parameter. 199 200 penalty_start : Non-negative integer. The number of columns, variables 201 etc., to except from penalisation. Equivalently, the first 202 index to be penalised. Default is 0, all columns are included. 203 204 mean : Boolean. Whether to compute the squared loss or the mean 205 squared loss. Default is True, the mean squared loss. 206 """ 207 self.X = X 208 self.y = y 209 self.k = float(k) 210 211 self.penalty_start = int(penalty_start) 212 self.mean = bool(mean) 213 214 self.reset()

215

216 - def reset(self):

217 """Free any cached computations from previous use of this Function. 218 219 From the interface "Function". 220 """ 221 self._lambda_max = None 222 self._lambda_min = None

223

224 - def f(self, beta):

225 """Function value. 226 227 From the interface "Function". 228 229 Parameters 230 ---------- 231 beta : Numpy array. Regression coefficient vector. The point at which 232 to evaluate the function. 233 """ 234 if self.penalty_start > 0: 235 beta_ = beta[self.penalty_start:, :] 236 else: 237 beta_ = beta 238 239 if self.mean: 240 d = 2.0 * float(self.X.shape[0]) 241 else: 242 d = 2.0 243 244 f = (1.0 / d) * np.sum((np.dot(self.X, beta) - self.y) ** 2.0) \ 245 + (self.k / 2.0) * np.sum(beta_ ** 2.0) 246 247 return f

248

249 - def grad(self, beta):

250 """Gradient of the function at beta. 251 252 From the interface "Gradient". 253 254 Parameters 255 ---------- 256 beta : Numpy array. The point at which to evaluate the gradient. 257 258 Examples 259 -------- 260 >>> import numpy as np 261 >>> from parsimony.functions.losses import RidgeRegression 262 >>> 263 >>> np.random.seed(42) 264 >>> X = np.random.rand(100, 150) 265 >>> y = np.random.rand(100, 1) 266 >>> rr = RidgeRegression(X=X, y=y, k=3.14159265) 267 >>> beta = np.random.rand(150, 1) 268 >>> round(np.linalg.norm(rr.grad(beta) 269 ... - rr.approx_grad(beta, eps=1e-4)), 9) 270 1.3e-08 271 """ 272 gradOLS = np.dot((np.dot(self.X, beta) - self.y).T, self.X).T 273 274 if self.mean: 275 gradOLS *= 1.0 / float(self.X.shape[0]) 276 277 if self.penalty_start > 0: 278 gradL2 = np.vstack((np.zeros((self.penalty_start, 1)), 279 self.k * beta[self.penalty_start:, :])) 280 else: 281 gradL2 = self.k * beta 282 283 grad = gradOLS + gradL2 284 285 return grad

286

287 - def L(self):

288 """Lipschitz constant of the gradient. 289 290 From the interface "LipschitzContinuousGradient". 291 """ 292 if self._lambda_max is None: 293 s = np.linalg.svd(self.X, full_matrices=False, compute_uv=False) 294 295 self._lambda_max = np.max(s) ** 2.0 296 297 if len(s) < self.X.shape[1]: 298 self._lambda_min = 0.0 299 else: 300 self._lambda_min = np.min(s) ** 2.0 301 302 if self.mean: 303 self._lambda_max /= float(self.X.shape[0]) 304 self._lambda_min /= float(self.X.shape[0]) 305 306 return self._lambda_max + self.k

307 308 @utils.deprecated("StronglyConvex.parameter")

309 - def lambda_min(self):

310 """Smallest eigenvalue of the corresponding covariance matrix. 311 312 From the interface "Eigenvalues". 313 """ 314 return self.parameter()

315

316 - def parameter(self):

317 """Returns the strongly convex parameter for the function. 318 319 From the interface "StronglyConvex". 320 """ 321 if self._lambda_min is None: 322 self._lambda_max = None 323 self.L() # Precompute 324 325 return self._lambda_min + self.k

326

327 - def step(self, beta, index=0):

328 """The step size to use in descent methods. 329 330 Parameters 331 ---------- 332 beta : Numpy array. The point at which to determine the step size. 333 """ 334 return 1.0 / self.L()

335

336 337 -class LogisticRegression(properties.AtomicFunction, 338 properties.Gradient, 339 properties.LipschitzContinuousGradient, 340 properties.StepSize):

341 """The Logistic Regression loss function. 342 343 (Re-weighted) Log-likelihood (cross-entropy): 344 * f(beta) = -Sum wi (yi log(pi) + (1 − yi) log(1 − pi)) 345 = -Sum wi (yi xi' beta − log(1 + e(x_i'beta))), 346 347 * grad f(beta) = -Sum wi[ xi (yi - pi)] + k beta, 348 349 where pi = p(y=1 | xi, beta) = 1 / (1 + exp(-x_i'beta)) and wi is the 350 weight for sample i. 351 352 See [Hastie 2009, p.: 102, 119 and 161, Bishop 2006 p.: 206] for details. 353 354 Parameters 355 ---------- 356 X : Numpy array (n-by-p). The regressor matrix. 357 358 y : Numpy array (n-by-1). The regressand vector. 359 360 weights: Numpy array (n-by-1). The sample's weights. 361 362 mean : Boolean. Whether to compute the squared loss or the mean squared 363 loss. Default is True, the mean squared loss. 364 """

365 - def __init__(self, X, y, weights=None, mean=True):

366 self.X = X 367 self.y = y 368 if weights is None: 369 # TODO: Make the weights sparse. 370 #weights = np.eye(self.X.shape[0]) 371 weights = np.ones(y.shape).reshape(y.shape) 372 # TODO: Allow the weight vector to be a list. 373 self.weights = weights 374 self.mean = bool(mean) 375 376 self.reset()

377

378 - def reset(self):

379 """Free any cached computations from previous use of this Function. 380 381 From the interface "Function". 382 """ 383 self._L = None

384

385 - def f(self, beta):

386 """Function value at the point beta. 387 388 From the interface "Function". 389 390 Parameters 391 ---------- 392 beta : Numpy array. Regression coefficient vector. The point at which 393 to evaluate the function. 394 """ 395 Xbeta = np.dot(self.X, beta) 396 negloglike = -np.sum(self.weights * 397 ((self.y * Xbeta) - np.log(1 + np.exp(Xbeta)))) 398 399 if self.mean: 400 negloglike /= float(self.X.shape[0]) 401 402 return negloglike

403

404 - def grad(self, beta):

405 """Gradient of the function at beta. 406 407 From the interface "Gradient". 408 409 Parameters 410 ---------- 411 beta : Numpy array. The point at which to evaluate the gradient. 412 413 Examples 414 -------- 415 >>> import numpy as np 416 >>> from parsimony.functions.losses import LogisticRegression 417 >>> 418 >>> np.random.seed(42) 419 >>> X = np.random.rand(100, 150) 420 >>> y = np.random.randint(0, 2, (100, 1)) 421 >>> lr = LogisticRegression(X=X, y=y, mean=True) 422 >>> beta = np.random.rand(150, 1) 423 >>> round(np.linalg.norm(lr.grad(beta) 424 ... - lr.approx_grad(beta, eps=1e-4)), 10) 425 4e-10 426 >>> 427 >>> np.random.seed(42) 428 >>> X = np.random.rand(100, 150) 429 >>> y = np.random.randint(0, 2, (100, 1)) 430 >>> lr = LogisticRegression(X=X, y=y, mean=False) 431 >>> beta = np.random.rand(150, 1) 432 >>> round(np.linalg.norm(lr.grad(beta) 433 ... - lr.approx_grad(beta, eps=1e-4)), 9) 434 3.9e-08 435 """ 436 Xbeta = np.dot(self.X, beta) 437 # pi = 1.0 / (1.0 + np.exp(-Xbeta)) 438 pi = np.reciprocal(1.0 + np.exp(-Xbeta)) 439 440 grad = -np.dot(self.X.T, self.weights * (self.y - pi)) 441 442 if self.mean: 443 grad *= 1.0 / float(self.X.shape[0]) 444 445 return grad

446

447 - def L(self):

448 """Lipschitz constant of the gradient. 449 450 Returns the maximum eigenvalue of (1 / 4) * X'WX. 451 452 From the interface "LipschitzContinuousGradient". 453 454 Examples 455 -------- 456 >>> import numpy as np 457 >>> from parsimony.functions.losses import LogisticRegression 458 >>> 459 >>> np.random.seed(42) 460 >>> X = np.random.rand(10, 15) 461 >>> y = np.random.randint(0, 2, (10, 1)) 462 >>> lr = LogisticRegression(X=X, y=y, mean=True) 463 >>> L = lr.L() 464 >>> L_ = lr.approx_L((15, 1), 10000) 465 >>> L >= L_ 466 True 467 >>> round((L - L_) / L, 15) 468 0.45110910457988 469 >>> lr = LogisticRegression(X=X, y=y, mean=False) 470 >>> L = lr.L() 471 >>> L_ = lr.approx_L((15, 1), 10000) 472 >>> L >= L_ 473 True 474 >>> round((L - L_) / L, 13) 475 0.430306683612 476 """ 477 if self._L == None: 478 # pi(x) * (1 - pi(x)) <= 0.25 = 0.5 * 0.5 479 PWX = 0.5 * np.sqrt(self.weights) * self.X 480 # TODO: Use FastSVD for speedup! 481 s = np.linalg.svd(PWX, full_matrices=False, compute_uv=False) 482 self._L = np.max(s) ** 2.0 # TODO: CHECK 483 484 if self.mean: 485 self._L /= float(self.X.shape[0]) 486 487 return self._L

488

489 - def step(self, beta, index=0):

490 """The step size to use in descent methods. 491 492 Parameters 493 ---------- 494 beta : Numpy array. The point at which to determine the step size. 495 """ 496 return 1.0 / self.L()

497

498 499 -class RidgeLogisticRegression(properties.CompositeFunction, 500 properties.Gradient, 501 properties.LipschitzContinuousGradient, 502 properties.StepSize):

503 """The Logistic Regression loss function with a squared L2 penalty. 504 505 Ridge (re-weighted) log-likelihood (cross-entropy): 506 507 * f(beta) = -loglik + k/2 * ||beta||^2_2 508 = -Sum wi (yi log(pi) + (1 − yi) log(1 − pi)) + k/2*||beta||^2_2 509 = -Sum wi (yi xi' beta − log(1 + e(xi' beta))) + k/2*||beta||^2_2 510 511 * grad f(beta) = -Sum wi[ xi (yi - pi)] + k beta 512 513 pi = p(y=1|xi, beta) = 1 / (1 + exp(-xi' beta)) 514 wi: sample i weight 515 [Hastie 2009, p.: 102, 119 and 161, Bishop 2006 p.: 206] 516 """

517 - def __init__(self, X, y, k=0.0, weights=None, penalty_start=0, mean=True):

518 """ 519 Parameters 520 ---------- 521 X : Numpy array (n-by-p). The regressor matrix. Training vectors, where 522 n is the number of samples and p is the number of features. 523 524 y : Numpy array (n-by-1). The regressand vector. Target values (class 525 labels in classification). 526 527 k : Non-negative float. The ridge parameter. 528 529 weights: Numpy array (n-by-1). The sample's weights. 530 531 penalty_start : Non-negative integer. The number of columns, variables 532 etc., to except from penalisation. Equivalently, the first 533 index to be penalised. Default is 0, all columns are included. 534 535 mean : Boolean. Whether to compute the mean loss or not. Default is 536 True, the mean loss is computed. 537 """ 538 self.X = X 539 self.y = y 540 self.k = max(0.0, float(k)) 541 if weights is None: 542 weights = np.ones(y.shape) # .reshape(y.shape) 543 self.weights = weights 544 self.penalty_start = max(0, int(penalty_start)) 545 self.mean = bool(mean) 546 547 self.reset()

548

549 - def reset(self):

550 """Free any cached computations from previous use of this Function. 551 552 From the interface "Function". 553 """ 554 self._L = None

555

556 - def f(self, beta):

557 """Function value of Logistic regression at beta. 558 559 Parameters 560 ---------- 561 beta : Numpy array. Regression coefficient vector. The point at which 562 to evaluate the function. 563 """ 564 # TODO check the correctness of the re-weighted loglike 565 Xbeta = np.dot(self.X, beta) 566 negloglike = -np.sum(self.weights * 567 ((self.y * Xbeta) - np.log(1 + np.exp(Xbeta)))) 568 569 if self.mean: 570 negloglike *= 1.0 / float(self.X.shape[0]) 571 572 if self.penalty_start > 0: 573 beta_ = beta[self.penalty_start:, :] 574 else: 575 beta_ = beta 576 577 return negloglike + (self.k / 2.0) * np.sum(beta_ ** 2.0)

578

579 - def grad(self, beta):

580 """Gradient of the function at beta. 581 582 From the interface "Gradient". 583 584 Parameters 585 ---------- 586 beta : Numpy array. The point at which to evaluate the gradient. 587 588 Examples 589 -------- 590 >>> import numpy as np 591 >>> from parsimony.functions.losses import RidgeLogisticRegression 592 >>> 593 >>> np.random.seed(42) 594 >>> X = np.random.rand(100, 150) 595 >>> y = np.random.rand(100, 1) 596 >>> y[y < 0.5] = 0.0 597 >>> y[y >= 0.5] = 1.0 598 >>> rr = RidgeLogisticRegression(X=X, y=y, k=2.71828182, mean=True) 599 >>> beta = np.random.rand(150, 1) 600 >>> round(np.linalg.norm(rr.grad(beta) 601 ... - rr.approx_grad(beta, eps=1e-4)), 11) < 1e-9 602 True 603 >>> 604 >>> np.random.seed(42) 605 >>> X = np.random.rand(100, 150) 606 >>> y = np.random.rand(100, 1) 607 >>> y[y < 0.5] = 0.0 608 >>> y[y >= 0.5] = 1.0 609 >>> rr = RidgeLogisticRegression(X=X, y=y, k=2.71828182, mean=False) 610 >>> beta = np.random.rand(150, 1) 611 >>> np.linalg.norm(rr.grad(beta) 612 ... - rr.approx_grad(beta, eps=1e-4)) < 5e-8 613 True 614 """ 615 Xbeta = np.dot(self.X, beta) 616 # pi = 1.0 / (1.0 + np.exp(-Xbeta)) 617 pi = np.reciprocal(1.0 + np.exp(-Xbeta)) 618 619 grad = -np.dot(self.X.T, self.weights * (self.y - pi)) 620 if self.mean: 621 grad *= 1.0 / float(self.X.shape[0]) 622 623 if self.penalty_start > 0: 624 gradL2 = np.vstack((np.zeros((self.penalty_start, 1)), 625 self.k * beta[self.penalty_start:, :])) 626 else: 627 gradL2 = self.k * beta 628 629 grad = grad + gradL2 630 631 return grad

632 633 # return -np.dot(self.X.T, 634 # np.dot(self.W, (self.y - pi))) \ 635 # + self.k * beta 636

637 - def L(self):

638 """Lipschitz constant of the gradient. 639 640 Returns the maximum eigenvalue of (1 / 4) * X'WX. 641 642 From the interface "LipschitzContinuousGradient". 643 """ 644 if self._L == None: 645 # pi(x) * (1 - pi(x)) <= 0.25 = 0.5 * 0.5 646 PWX = 0.5 * np.sqrt(self.weights) * self.X # TODO: CHECK WITH FOUAD 647 # PW = 0.5 * np.eye(self.X.shape[0]) ## miss np.sqrt(self.W) 648 #PW = 0.5 * np.sqrt(self.W) 649 #PWX = np.dot(PW, self.X) 650 # TODO: Use FastSVD for speedup! 651 s = np.linalg.svd(PWX, full_matrices=False, compute_uv=False) 652 self._L = np.max(s) ** 2.0 # TODO: CHECK 653 654 if self.mean: 655 self._L /= float(self.X.shape[0]) 656 657 self._L += self.k # TODO: CHECK 658 659 return self._L

660

661 - def step(self, beta, index=0):

662 """The step size to use in descent methods. 663 664 Parameters 665 ---------- 666 beta : Numpy array. The point at which to determine the step size. 667 """ 668 return 1.0 / self.L()

669

670 671 -class LatentVariableVariance(properties.Function, 672 properties.Gradient, 673 properties.StepSize, 674 properties.LipschitzContinuousGradient):

675 # TODO: Handle mean here?

676 - def __init__(self, X, unbiased=True):

677 678 self.X = X 679 if unbiased: 680 self._n = float(X.shape[0] - 1.0) 681 else: 682 self._n = float(X.shape[0]) 683 684 self.reset()

685

686 - def reset(self):

687 688 self._lambda_max = None

689

690 - def f(self, w):

691 """Function value. 692 693 From the interface "Function". 694 695 Examples 696 -------- 697 >>> import numpy as np 698 >>> from parsimony.algorithms.nipals import FastSVD 699 >>> from parsimony.functions.losses import LatentVariableVariance 700 >>> 701 >>> np.random.seed(1337) 702 >>> X = np.random.rand(50, 150) 703 >>> w = np.random.rand(150, 1) 704 >>> var = LatentVariableVariance(X) 705 >>> round(var.f(w), 12) 706 -1295.854475188615 707 >>> round(-np.dot(w.T, np.dot(X.T, np.dot(X, w)))[0, 0] / 49.0, 12) 708 -1295.854475188615 709 """ 710 Xw = np.dot(self.X, w) 711 wXXw = np.dot(Xw.T, Xw)[0, 0] 712 return -wXXw / self._n

713

714 - def grad(self, w):

715 """Gradient of the function. 716 717 From the interface "Gradient". 718 719 Parameters 720 ---------- 721 w : The point at which to evaluate the gradient. 722 723 Examples 724 -------- 725 >>> import numpy as np 726 >>> from parsimony.functions.losses import LatentVariableVariance 727 >>> 728 >>> np.random.seed(42) 729 >>> X = np.random.rand(50, 150) 730 >>> var = LatentVariableVariance(X) 731 >>> w = np.random.rand(150, 1) 732 >>> np.linalg.norm(var.grad(w) - var.approx_grad(w, eps=1e-4)) < 5e-8 733 True 734 """ 735 grad = -np.dot(self.X.T, np.dot(self.X, w)) * (2.0 / self._n) 736 737 # approx_grad = utils.approx_grad(f, w, eps=1e-4) 738 # print "LatentVariableVariance:", maths.norm(grad - approx_grad) 739 740 return grad

741

742 - def L(self):

743 """Lipschitz constant of the gradient with given index. 744 745 From the interface "LipschitzContinuousGradient". 746 747 Examples 748 -------- 749 >>> import numpy as np 750 >>> from parsimony.algorithms.nipals import FastSVD 751 >>> from parsimony.functions.losses import LatentVariableVariance 752 >>> 753 >>> np.random.seed(1337) 754 >>> X = np.random.rand(50, 150) 755 >>> w = np.random.rand(150, 1) 756 >>> var = LatentVariableVariance(X) 757 >>> round(var.L(), 10) 758 47025.0809786841 759 >>> _, S, _ = np.linalg.svd(np.dot(X.T, X)) 760 >>> round(np.max(S) * 49 / 2.0, 10) 761 47025.0809786841 762 """ 763 if self._lambda_max is None: 764 from parsimony.algorithms.nipals import FastSVD 765 v = FastSVD().run(self.X, max_iter=1000) 766 us = np.dot(self.X, v) 767 768 self._lambda_max = np.linalg.norm(us) ** 2.0 769 770 return self._n * self._lambda_max / 2.0

771

772 - def step(self, w, index=0):

773 """The step size to use in descent methods. 774 775 Parameters 776 ---------- 777 w : Numpy array. The point at which to determine the step size. 778 779 Examples 780 -------- 781 >>> import numpy as np 782 >>> from parsimony.algorithms.nipals import FastSVD 783 >>> from parsimony.functions.losses import LatentVariableVariance 784 >>> 785 >>> np.random.seed(42) 786 >>> X = np.random.rand(50, 150) 787 >>> w = np.random.rand(150, 1) 788 >>> var = LatentVariableVariance(X) 789 >>> var.step(w) 790 2.1979627581251385e-05 791 >>> _, S, _ = np.linalg.svd(np.dot(X.T, X)) 792 >>> round(1.0 / (np.max(S) * 49 / 2.0), 15) 793 2.1979627581e-05 794 """ 795 return 1.0 / self.L()

796

797 798 -class LinearFunction(properties.CompositeFunction, 799 properties.Gradient, 800 properties.LipschitzContinuousGradient, 801 properties.StepSize):

802 """A linear function. 803 """

804 - def __init__(self, a):

805 """ 806 Parameters 807 ---------- 808 a : Numpy array (p-by-1). The slope. 809 """ 810 self.a = a 811 812 self.reset()

813

814 - def reset(self):

815 """Free any cached computations from previous use of this Function. 816 817 From the interface "Function". 818 """ 819 pass

820

821 - def f(self, x):

822 """Function value. 823 824 From the interface "Function". 825 826 Parameters 827 ---------- 828 beta : Numpy array. Regression coefficient vector. The point at which 829 to evaluate the function. 830 """ 831 f = np.dot(self.a.T, x) 832 833 return f

834

835 - def grad(self, x):

836 """Gradient of the function at beta. 837 838 From the interface "Gradient". 839 840 Parameters 841 ---------- 842 x : The point at which to evaluate the gradient. 843 """ 844 grad = self.a 845 846 return grad

847

848 - def L(self):

849 """Lipschitz constant of the gradient. 850 851 From the interface "LipschitzContinuousGradient". 852 853 Examples 854 -------- 855 >>> import numpy as np 856 >>> from parsimony.functions.losses import LinearFunction 857 >>> 858 >>> np.random.seed(42) 859 >>> a = np.random.rand(10, 15) 860 >>> f = LinearFunction(a) 861 >>> L = f.L() 862 >>> L_ = f.approx_L((15, 1), 10) 863 >>> L >= L_ 864 True 865 >>> L - L_ 866 0.0 867 """ 868 return 0.0

869

870 - def step(self, beta=None, index=0):

871 """The step size to use in descent methods. 872 """ 873 return 1.0

874 875 876 if __name__ == "__main__": 877 import doctest 878 doctest.testmod() 879

Source Code for Module parsimony.functions.losses