parsimony.functions.penalties

1 # -*- coding: utf-8 -*- 2 """ 3 The :mod:`parsimony.functions.penalties` module contains the penalties used to 4 constrain the loss functions. These represent mathematical functions and 5 should thus have properties used by the corresponding algorithms. These 6 properties are defined in :mod:`parsimony.functions.properties`. 7 8 Penalties should be stateless. Penalties may be shared and copied and should 9 therefore not hold anything that cannot be recomputed the next time it is 10 called. 11 12 Created on Mon Apr 22 10:54:29 2013 13 14 Copyright (c) 2013-2014, CEA/DSV/I2BM/Neurospin. All rights reserved. 15 16 @author: Tommy Löfstedt, Vincent Guillemot, Edouard Duchesnay and 17 Fouad Hadj-Selem 18 @email: lofstedt.tommy@gmail.com, edouard.duchesnay@cea.fr 19 @license: BSD 3-clause. 20 """ 21 import time 22 23 import numpy as np 24 import scipy.sparse as sparse 25 26 try: 27 from . import properties # Only works when imported as a package. 28 except ValueError: 29 import parsimony.functions.properties as properties # Run as a script. 30 import parsimony.utils.maths as maths 31 import parsimony.utils.consts as consts 32 import parsimony.utils.linalgs as linalgs 33 34 __all__ = ["ZeroFunction", "L1", "L0", "LInf", "L2", "L2Squared", 35 "L1L2Squared", 36 "QuadraticConstraint", "RGCCAConstraint", "RidgeSquaredError", 37 "LinearVariableConstraint", 38 "SufficientDescentCondition"] 39 40

41 -class ZeroFunction(properties.AtomicFunction, 42 properties.Gradient, 43 properties.Penalty, 44 properties.Constraint, 45 properties.ProximalOperator, 46 properties.ProjectionOperator):

47

48 - def __init__(self, l=1.0, c=0.0, penalty_start=0):

49 """ 50 Parameters 51 ---------- 52 l : Non-negative float. The Lagrange multiplier, or regularisation 53 constant, of the function. 54 55 c : Float. The limit of the constraint. The function is feasible if 56 ||\beta||_1 <= c. The default value is c=0, i.e. the default is 57 a regularisation formulation. 58 59 penalty_start : Non-negative integer. The number of columns, variables 60 etc., to be exempt from penalisation. Equivalently, the first 61 index to be penalised. Default is 0, all columns are included. 62 """ 63 self.l = float(l) 64 self.c = float(c) 65 if self.c < 0.0: 66 raise ValueError("A negative constraint parameter does not make " \ 67 "sense, since the function is always zero.") 68 self.penalty_start = int(penalty_start) 69 70 self.reset()

71

72 - def reset(self):

73 74 self._zero = None

75

76 - def f(self, x):

77 """Function value. 78 """ 79 return 0.0

80

81 - def grad(self, x):

82 """Gradient of the function. 83 84 From the interface "Gradient". 85 """ 86 if self._zero is None: 87 self._zero = np.zeros(x.shape) 88 89 return self._zero

90

91 - def prox(self, x, factor=1.0, **kwargs):

92 """The corresponding proximal operator. 93 94 From the interface "ProximalOperator". 95 """ 96 return x

97

98 - def proj(self, x):

99 """The corresponding projection operator. 100 101 From the interface "ProjectionOperator". 102 """ 103 return x

104

105 - def feasible(self, x):

106 """Feasibility of the constraint. 107 108 From the interface "Constraint". 109 """ 110 return self.c >= 0.0

111 112

113 -class L1(properties.AtomicFunction, 114 properties.Penalty, 115 properties.Constraint, 116 properties.ProximalOperator, 117 properties.ProjectionOperator):

118 """The proximal operator of the L1 function with a penalty formulation 119 120 f(\beta) = l * (||\beta||_1 - c), 121 122 where ||\beta||_1 is the L1 loss function. The constrained version has the 123 form 124 125 ||\beta||_1 <= c. 126 127 Parameters 128 ---------- 129 l : Non-negative float. The Lagrange multiplier, or regularisation 130 constant, of the function. 131 132 c : Float. The limit of the constraint. The function is feasible if 133 ||\beta||_1 <= c. The default value is c=0, i.e. the default is a 134 regularisation formulation. 135 136 penalty_start : Non-negative integer. The number of columns, variables 137 etc., to be exempt from penalisation. Equivalently, the first index 138 to be penalised. Default is 0, all columns are included. 139 """

140 - def __init__(self, l=1.0, c=0.0, penalty_start=0):

141 142 self.l = float(l) 143 self.c = float(c) 144 self.penalty_start = int(penalty_start)

145

146 - def f(self, beta):

147 """Function value. 148 """ 149 if self.penalty_start > 0: 150 beta_ = beta[self.penalty_start:, :] 151 else: 152 beta_ = beta 153 154 return self.l * (maths.norm1(beta_) - self.c)

155

156 - def prox(self, beta, factor=1.0, **kwargs):

157 """The corresponding proximal operator. 158 159 From the interface "ProximalOperator". 160 """ 161 l = self.l * factor 162 if self.penalty_start > 0: 163 beta_ = beta[self.penalty_start:, :] 164 else: 165 beta_ = beta 166 167 prox = (np.abs(beta_) > l) * (beta_ - l * np.sign(beta_ - l)) 168 169 if self.penalty_start > 0: 170 prox = np.vstack((beta[:self.penalty_start, :], prox)) 171 172 return prox

173

174 - def proj(self, beta, **kwargs):

175 """The corresponding projection operator. 176 177 From the interface "ProjectionOperator". 178 """ 179 if self.penalty_start > 0: 180 beta_ = beta[self.penalty_start:, :] 181 else: 182 beta_ = beta 183 184 p = beta_.shape[0] 185 186 abs_beta = np.absolute(beta_) 187 norm1 = np.sum(abs_beta) 188 189 if norm1 <= self.c: # Feasible? 190 return beta 191 192 a = np.flipud(np.sort(abs_beta, axis=0)).ravel() 193 suma = np.cumsum(a) 194 195 phi = np.zeros((p + 1,)) 196 np.multiply(a, np.arange(-1, -p - 1, -1), phi[:p]) 197 phi[:p] += (suma - self.c) 198 phi[p] = suma[p - 1] - self.c 199 200 # TODO: BUG: i may be equal to p => IndexError: list index out of range 201 i = np.searchsorted(phi, 0.0) # First positive (or zero). 202 if phi[i] < 0.0: 203 # TODO: This should not be able to happen! Do we know it doesn't? 204 return self.__proj_old(beta) 205 i -= 1 # The last negative phi before positive (or zero). 206 if phi[i] >= 0.0: 207 # TODO: This should not be able to happen! Do we know it doesn't? 208 return self.__proj_old(beta) 209 210 l = a[i] + phi[i] / float(i + 1) # Find the Lagrange multiplier. 211 212 # The correction by eps is to nudge the L1 norm just below self.c. 213 eps = consts.FLOAT_EPSILON 214 l += eps 215 216 return (np.abs(beta_) > l) * (beta_ - l * np.sign(beta_ - l))

217

218 - def __proj_old(self, beta):

219 """The corresponding projection operator. 220 221 From the interface "ProjectionOperator". 222 """ 223 if self.penalty_start > 0: 224 beta_ = beta[self.penalty_start:, :] 225 else: 226 beta_ = beta 227 228 abs_beta = np.absolute(beta_) 229 norm1 = np.sum(abs_beta) 230 231 if norm1 <= self.c: # Feasible? 232 return beta 233 234 from parsimony.algorithms.explicit import Bisection 235 bisection = Bisection(force_negative=True, 236 parameter_positive=True, 237 parameter_negative=False, 238 parameter_zero=False, 239 eps=1e-8) 240 241 class F(properties.Function): 242 def __init__(self, beta, c): 243 self.beta = beta 244 self.c = c

245 246 def f(self, l): 247 beta = (abs_beta > l) \ 248 * (self.beta - l * np.sign(self.beta - l)) 249 250 return maths.norm1(beta) - self.c

251 252 func = F(beta_, self.c) 253 l = bisection.run(func, [0.0, np.max(np.abs(beta_))]) 254 255 return (abs_beta > l) * (beta_ - l * np.sign(beta_ - l)) 256

257 - def feasible(self, beta):

258 """Feasibility of the constraint. 259 260 From the interface "Constraint". 261 262 Parameters 263 ---------- 264 beta : Numpy array. The variable to check for feasibility. 265 """ 266 if self.penalty_start > 0: 267 beta_ = beta[self.penalty_start:, :] 268 else: 269 beta_ = beta 270 271 return maths.norm1(beta_) <= self.c

272 273

274 -class L0(properties.AtomicFunction, 275 properties.Penalty, 276 properties.Constraint, 277 properties.ProximalOperator, 278 properties.ProjectionOperator):

279 """The proximal operator of the "pseudo" L0 function 280 281 f(x) = l * (||x||_0 - c), 282 283 where ||x||_0 is the L0 loss function. The constrainted version has the 284 form 285 286 ||x||_0 <= c. 287 288 Warning: Note that this function is not convex, and the regular assumptions 289 when using it in e.g. ISTA or FISTA will not apply. Nevertheless, it will 290 still converge to a local minimum if we can guarantee that we obtain a 291 reduction of the smooth part in each step. See e.g.: 292 293 http://eprints.soton.ac.uk/142499/1/BD_NIHT09.pdf 294 http://people.ee.duke.edu/~lcarin/blumensath.pdf 295 296 Parameters 297 ---------- 298 l : Non-negative float. The Lagrange multiplier, or regularisation 299 constant, of the function. 300 301 c : Float. The limit of the constraint. The function is feasible if 302 ||x||_0 <= c. The default value is c=0, i.e. the default is a 303 regularisation formulation. 304 305 penalty_start : Non-negative integer. The number of columns, variables 306 etc., to be exempt from penalisation. Equivalently, the first index 307 to be penalised. Default is 0, all columns are included. 308 """

309 - def __init__(self, l=1.0, c=0.0, penalty_start=0):

310 311 self.l = float(l) 312 self.c = float(c) 313 self.penalty_start = int(penalty_start)

314

315 - def f(self, x):

316 """Function value. 317 318 From the interface "Function". 319 320 Example 321 ------- 322 >>> import numpy as np 323 >>> from parsimony.functions.penalties import L0 324 >>> import parsimony.utils.maths as maths 325 >>> 326 >>> np.random.seed(42) 327 >>> x = np.random.rand(10, 1) 328 >>> l0 = L0(l=0.5) 329 >>> maths.norm0(x) 330 10 331 >>> l0.f(x) - 0.5 * maths.norm0(x) 332 0.0 333 >>> x[0, 0] = 0.0 334 >>> maths.norm0(x) 335 9 336 >>> l0.f(x) - 0.5 * maths.norm0(x) 337 0.0 338 """ 339 if self.penalty_start > 0: 340 x_ = x[self.penalty_start:, :] 341 else: 342 x_ = x 343 344 return self.l * (maths.norm0(x_) - self.c)

345

346 - def prox(self, x, factor=1.0, **kwargs):

347 """The corresponding proximal operator. 348 349 From the interface "ProximalOperator". 350 351 Example 352 ------- 353 >>> import numpy as np 354 >>> from parsimony.functions.penalties import L0 355 >>> import parsimony.utils.maths as maths 356 >>> 357 >>> np.random.seed(42) 358 >>> x = np.random.rand(10, 1) 359 >>> l0 = L0(l=0.5) 360 >>> maths.norm0(x) 361 10 362 >>> l0.prox(x) 363 array([[ 0. ], 364 [ 0.95071431], 365 [ 0.73199394], 366 [ 0.59865848], 367 [ 0. ], 368 [ 0. ], 369 [ 0. ], 370 [ 0.86617615], 371 [ 0.60111501], 372 [ 0.70807258]]) 373 >>> l0.f(l0.prox(x)) 374 3.0 375 >>> 0.5 * maths.norm0(l0.prox(x)) 376 3.0 377 """ 378 if self.penalty_start > 0: 379 x_ = x[self.penalty_start:, :] 380 else: 381 x_ = x 382 383 l = self.l * factor 384 prox = x_ * (np.abs(x_) > l) # Hard thresholding. 385 prox = np.vstack((x[:self.penalty_start, :], # Unregularised variables 386 prox)) 387 388 return prox

389

390 - def proj(self, x):

391 """The corresponding projection operator. 392 393 From the interface "ProjectionOperator". 394 395 Examples 396 -------- 397 >>> import numpy as np 398 >>> from parsimony.functions.penalties import L0 399 >>> 400 >>> np.random.seed(42) 401 >>> x = np.random.rand(10, 1) * 2.0 - 1.0 402 >>> l0 = L0(c=5.0) 403 >>> l0.proj(x) 404 array([[ 0. ], 405 [ 0.90142861], 406 [ 0. ], 407 [ 0. ], 408 [-0.68796272], 409 [-0.68801096], 410 [-0.88383278], 411 [ 0.73235229], 412 [ 0. ], 413 [ 0. ]]) 414 """ 415 if self.penalty_start > 0: 416 x_ = x[self.penalty_start:, :] 417 else: 418 x_ = x 419 420 if maths.norm0(x_) <= self.c: 421 return x 422 423 K = int(np.floor(self.c) + 0.5) 424 ind = np.abs(x_.ravel()).argsort()[:K] 425 y = np.copy(x_) 426 y[ind] = 0.0 427 428 if self.penalty_start > 0: 429 # Add the unregularised variables. 430 y = np.vstack((x[:self.penalty_start, :], 431 y)) 432 433 return y

434

435 - def feasible(self, beta):

436 """Feasibility of the constraint. 437 438 From the interface "Constraint". 439 440 Parameters 441 ---------- 442 beta : Numpy array. The variable to check for feasibility. 443 444 Examples 445 -------- 446 >>> import numpy as np 447 >>> from parsimony.functions.penalties import L0 448 >>> 449 >>> np.random.seed(42) 450 >>> x = np.random.rand(10, 1) * 2.0 - 1.0 451 >>> l0 = L0(c=5.0) 452 >>> l0.feasible(x) 453 False 454 >>> l0.feasible(l0.proj(x)) 455 True 456 """ 457 if self.penalty_start > 0: 458 beta_ = beta[self.penalty_start:, :] 459 else: 460 beta_ = beta 461 462 return maths.norm0(beta_) <= self.c

463 464

465 -class LInf(properties.AtomicFunction, 466 properties.Penalty, 467 properties.Constraint, 468 properties.ProximalOperator, 469 properties.ProjectionOperator):

470 """The proximal operator of the L-infinity function 471 472 f(x) = l * (||x||_inf - c), 473 474 where ||x||_inf is the L-infinity loss function. The constrainted version 475 has the form 476 477 ||x||_inf <= c. 478 479 Parameters 480 ---------- 481 l : Non-negative float. The Lagrange multiplier, or regularisation 482 constant, of the function. 483 484 c : Float. The limit of the constraint. The function is feasible if 485 ||x||_inf <= c. The default value is c=0, i.e. the default is a 486 regularisation formulation. 487 488 penalty_start : Non-negative integer. The number of columns, variables 489 etc., to be exempt from penalisation. Equivalently, the first index 490 to be penalised. Default is 0, all columns are included. 491 """

492 - def __init__(self, l=1.0, c=0.0, penalty_start=0):

493 494 self.l = float(l) 495 self.c = float(c) 496 self.penalty_start = int(penalty_start)

497

498 - def f(self, x):

499 """Function value. 500 501 From the interface "Function". 502 503 Parameters 504 ---------- 505 x : Numpy array. The point at which to evaluate the function. 506 507 Example 508 ------- 509 >>> import numpy as np 510 >>> from parsimony.functions.penalties import LInf 511 >>> import parsimony.utils.maths as maths 512 >>> 513 >>> np.random.seed(42) 514 >>> x = np.random.rand(10, 1) 515 >>> linf = LInf(l=1.1) 516 >>> linf.f(x) - 1.1 * maths.normInf(x) 517 0.0 518 """ 519 if self.penalty_start > 0: 520 x_ = x[self.penalty_start:, :] 521 else: 522 x_ = x 523 524 return self.l * (maths.normInf(x_) - self.c)

525

526 - def prox(self, x, factor=1.0, **kwargs):

527 """The corresponding proximal operator. 528 529 From the interface "ProximalOperator". 530 531 Examples 532 -------- 533 >>> import numpy as np 534 >>> from parsimony.functions.penalties import LInf 535 >>> import parsimony.utils.maths as maths 536 >>> 537 >>> np.random.seed(42) 538 >>> x = np.random.rand(10, 1) 539 >>> linf = LInf(l=1.45673045, c=0.5) 540 >>> linf_prox = linf.prox(x) 541 >>> linf_prox 542 array([[ 0.37454012], 543 [ 0.5 ], 544 [ 0.5 ], 545 [ 0.5 ], 546 [ 0.15601864], 547 [ 0.15599452], 548 [ 0.05808361], 549 [ 0.5 ], 550 [ 0.5 ], 551 [ 0.5 ]]) 552 >>> linf_proj = linf.proj(x) 553 >>> linf_proj 554 array([[ 0.37454012], 555 [ 0.5 ], 556 [ 0.5 ], 557 [ 0.5 ], 558 [ 0.15601864], 559 [ 0.15599452], 560 [ 0.05808361], 561 [ 0.5 ], 562 [ 0.5 ], 563 [ 0.5 ]]) 564 >>> np.linalg.norm(linf_prox - linf_proj) 565 7.2392821740411278e-09 566 """ 567 if self.penalty_start > 0: 568 x_ = x[self.penalty_start:, :] 569 else: 570 x_ = x 571 572 l = self.l * factor 573 l1 = L1(c=l) # Project onto an L1 ball with radius c=l. 574 y = x_ - l1.proj(x_) 575 # TODO: Check if this is correct! 576 577 # Put the unregularised variables back. 578 if self.penalty_start > 0: 579 y = np.vstack((x[:self.penalty_start, :], 580 y)) 581 582 return y

583

584 - def proj(self, x):

585 """The corresponding projection operator. 586 587 From the interface "ProjectionOperator". 588 589 Examples 590 -------- 591 >>> import numpy as np 592 >>> from parsimony.functions.penalties import LInf 593 >>> 594 >>> np.random.seed(42) 595 >>> x = np.random.rand(10, 1) * 2.0 - 1.0 596 >>> linf = LInf(c=0.618) 597 >>> linf.proj(x) 598 array([[-0.25091976], 599 [ 0.618 ], 600 [ 0.46398788], 601 [ 0.19731697], 602 [-0.618 ], 603 [-0.618 ], 604 [-0.618 ], 605 [ 0.618 ], 606 [ 0.20223002], 607 [ 0.41614516]]) 608 """ 609 if self.penalty_start > 0: 610 x_ = x[self.penalty_start:, :] 611 else: 612 x_ = x 613 614 if maths.normInf(x_) <= self.c: 615 return x 616 617 y = np.copy(x_) 618 y[y > self.c] = self.c 619 y[y < -self.c] = -self.c 620 621 # Put the unregularised variables back. 622 if self.penalty_start > 0: 623 y = np.vstack((x[:self.penalty_start, :], 624 y)) 625 626 return y

627

628 - def feasible(self, x):

629 """Feasibility of the constraint. 630 631 From the interface "Constraint". 632 633 Parameters 634 ---------- 635 x : Numpy array. The variable to check for feasibility. 636 637 Examples 638 -------- 639 >>> import numpy as np 640 >>> from parsimony.functions.penalties import LInf 641 >>> 642 >>> np.random.seed(42) 643 >>> x = np.random.rand(10, 1) * 2.0 - 1.0 644 >>> linf = LInf(c=0.618) 645 >>> linf.feasible(x) 646 False 647 >>> linf.feasible(linf.proj(x)) 648 True 649 """ 650 if self.penalty_start > 0: 651 x_ = x[self.penalty_start:, :] 652 else: 653 x_ = x 654 655 return maths.normInf(x_) <= self.c

656 657

658 -class L2(properties.AtomicFunction, 659 properties.Penalty, 660 properties.Constraint, 661 properties.ProximalOperator, 662 properties.ProjectionOperator):

663 """The proximal operator of the L2 function with a penalty formulation 664 665 f(\beta) = l * (0.5 * ||\beta||_2 - c), 666 667 where ||\beta||_2 is the L2 loss function. The constrained version has 668 the form 669 670 0.5 * ||\beta||_2 <= c. 671 672 Parameters 673 ---------- 674 l : Non-negative float. The Lagrange multiplier, or regularisation 675 constant, of the function. 676 677 c : Float. The limit of the constraint. The function is feasible if 678 0.5 * ||\beta||_2 <= c. The default value is c=0, i.e. the 679 default is a regularised formulation. 680 681 penalty_start : Non-negative integer. The number of columns, variables 682 etc., to be exempt from penalisation. Equivalently, the first index 683 to be penalised. Default is 0, all columns are included. 684 """

685 - def __init__(self, l=1.0, c=0.0, penalty_start=0):

686 687 self.l = float(l) 688 self.c = float(c) 689 self.penalty_start = int(penalty_start)

690

691 - def f(self, beta):

692 """Function value. 693 694 From the interface "Function". 695 """ 696 if self.penalty_start > 0: 697 beta_ = beta[self.penalty_start:, :] 698 else: 699 beta_ = beta 700 701 return self.l * (maths.norm(beta_) - self.c)

702

703 - def prox(self, beta, factor=1.0, **kwargs):

704 """The corresponding proximal operator. 705 706 From the interface "ProximalOperator". 707 """ 708 l = self.l * factor 709 if self.penalty_start > 0: 710 beta_ = beta[self.penalty_start:, :] 711 else: 712 beta_ = beta 713 714 norm = maths.norm(beta_) 715 if norm >= l: 716 beta_ *= (1.0 - l / norm) * beta_ 717 else: 718 beta_ *= 0.0 719 720 if self.penalty_start > 0: 721 prox = np.vstack((beta[:self.penalty_start, :], beta_)) 722 else: 723 prox = beta_ 724 725 return prox

726

727 - def proj(self, beta, **kwargs):

728 """The corresponding projection operator. 729 730 From the interface "ProjectionOperator". 731 732 Examples 733 -------- 734 >>> import numpy as np 735 >>> from parsimony.functions.penalties import L2 736 >>> np.random.seed(42) 737 >>> l2 = L2(c=0.3183098861837907) 738 >>> y1 = l2.proj(np.random.rand(100, 1) * 2.0 - 1.0) 739 >>> round(np.linalg.norm(y1), 15) 740 0.318309886183791 741 >>> y2 = np.random.rand(100, 1) * 2.0 - 1.0 742 >>> l2.feasible(y2) 743 False 744 >>> l2.feasible(l2.proj(y2)) 745 True 746 """ 747 if self.penalty_start > 0: 748 beta_ = beta[self.penalty_start:, :] 749 else: 750 beta_ = beta 751 752 norm = maths.norm(beta_) 753 754 # Feasible? 755 if norm <= self.c: 756 return beta 757 758 # The correction by eps is to nudge the norm just below self.c. 759 eps = consts.FLOAT_EPSILON 760 beta_ *= self.c / (norm + eps) 761 proj = beta_ 762 if self.penalty_start > 0: 763 proj = np.vstack((beta[:self.penalty_start, :], beta_)) 764 765 return proj

766

767 - def feasible(self, beta):

768 """Feasibility of the constraint. 769 770 From the interface "Constraint". 771 772 Parameters 773 ---------- 774 beta : Numpy array. The variable to check for feasibility. 775 776 Examples 777 -------- 778 >>> import numpy as np 779 >>> from parsimony.functions.penalties import L2 780 >>> np.random.seed(42) 781 >>> l2 = L2(c=0.3183098861837907) 782 >>> y1 = 0.01 * (np.random.rand(50, 1) * 2.0 - 1.0) 783 >>> l2.feasible(y1) 784 True 785 >>> y2 = 10.0 * (np.random.rand(50, 1) * 2.0 - 1.0) 786 >>> l2.feasible(y2) 787 False 788 >>> y3 = l2.proj(50.0 * np.random.rand(100, 1) * 2.0 - 1.0) 789 >>> l2.feasible(y3) 790 True 791 """ 792 if self.penalty_start > 0: 793 beta_ = beta[self.penalty_start:, :] 794 else: 795 beta_ = beta 796 797 return maths.norm(beta_) <= self.c + consts.FLOAT_EPSILON

798 799

800 -class L2Squared(properties.AtomicFunction, 801 properties.Gradient, 802 properties.LipschitzContinuousGradient, 803 properties.Penalty, 804 properties.Constraint, 805 properties.ProximalOperator, 806 properties.ProjectionOperator):

807 """The proximal operator of the squared L2 function with a penalty 808 formulation 809 810 f(\beta) = l * (0.5 * ||\beta||²_2 - c), 811 812 where ||\beta||²_2 is the squared L2 loss function. The constrained 813 version has the form 814 815 0.5 * ||\beta||²_2 <= c. 816 817 Parameters 818 ---------- 819 l : Non-negative float. The Lagrange multiplier, or regularisation 820 constant, of the function. 821 822 c : Float. The limit of the constraint. The function is feasible if 823 0.5 * ||\beta||²_2 <= c. The default value is c=0, i.e. the 824 default is a regularised formulation. 825 826 penalty_start : Non-negative integer. The number of columns, variables 827 etc., to be exempt from penalisation. Equivalently, the first index 828 to be penalised. Default is 0, all columns are included. 829 """

830 - def __init__(self, l=1.0, c=0.0, penalty_start=0):

831 832 self.l = float(l) 833 self.c = float(c) 834 self.penalty_start = int(penalty_start)

835

836 - def f(self, beta):

837 """Function value. 838 839 From the interface "Function". 840 """ 841 if self.penalty_start > 0: 842 beta_ = beta[self.penalty_start:, :] 843 else: 844 beta_ = beta 845 846 return self.l * (0.5 * np.dot(beta_.T, beta_)[0, 0] - self.c)

847

848 - def grad(self, beta):

849 """Gradient of the function. 850 851 From the interface "Gradient". 852 853 Example 854 ------- 855 >>> import numpy as np 856 >>> from parsimony.functions.penalties import L2Squared 857 >>> 858 >>> np.random.seed(42) 859 >>> beta = np.random.rand(100, 1) 860 >>> l2 = L2Squared(l=3.14159, c=2.71828) 861 >>> np.linalg.norm(l2.grad(beta) 862 ... - l2.approx_grad(beta, eps=1e-4)) < 5e-10 863 True 864 >>> 865 >>> l2 = L2Squared(l=3.14159, c=2.71828, penalty_start=5) 866 >>> np.linalg.norm(l2.grad(beta) 867 ... - l2.approx_grad(beta, eps=1e-4)) < 5e-10 868 True 869 """ 870 if self.penalty_start > 0: 871 beta_ = beta[self.penalty_start:, :] 872 grad = np.vstack((np.zeros((self.penalty_start, 1)), 873 self.l * beta_)) 874 else: 875 beta_ = beta 876 grad = self.l * beta_ 877 878 # approx_grad = utils.approx_grad(self.f, beta, eps=1e-4) 879 # print maths.norm(grad - approx_grad) 880 881 return grad

882

883 - def L(self):

884 """Lipschitz constant of the gradient. 885 """ 886 return self.l

887

888 - def prox(self, beta, factor=1.0, **kwargs):

889 """The corresponding proximal operator. 890 891 From the interface "ProximalOperator". 892 """ 893 l = self.l * factor 894 if self.penalty_start > 0: 895 beta_ = beta[self.penalty_start:, :] 896 else: 897 beta_ = beta 898 899 if self.penalty_start > 0: 900 prox = np.vstack((beta[:self.penalty_start, :], 901 beta_ * (1.0 / (1.0 + l)))) 902 else: 903 prox = beta_ * (1.0 / (1.0 + l)) 904 905 return prox

906

907 - def proj(self, beta, **kwargs):

908 """The corresponding projection operator. 909 910 From the interface "ProjectionOperator". 911 912 Examples 913 -------- 914 >>> import numpy as np 915 >>> from parsimony.functions.penalties import L2Squared 916 >>> np.random.seed(42) 917 >>> l2 = L2Squared(c=0.3183098861837907) 918 >>> y1 = l2.proj(np.random.rand(100, 1) * 2.0 - 1.0) 919 >>> round(0.5 * np.linalg.norm(y1) ** 2.0, 15) 920 0.318309886183791 921 >>> y2 = np.random.rand(100, 1) * 2.0 - 1.0 922 >>> l2.feasible(y2) 923 False 924 >>> l2.feasible(l2.proj(y2)) 925 True 926 """ 927 if self.penalty_start > 0: 928 beta_ = beta[self.penalty_start:, :] 929 else: 930 beta_ = beta 931 932 sqnorm = np.dot(beta_.T, beta_)[0, 0] 933 934 # Feasible? 935 if 0.5 * sqnorm <= self.c: 936 return beta 937 938 # The correction by eps is to nudge the squared norm just below 939 # self.c. 940 eps = consts.FLOAT_EPSILON 941 if self.penalty_start > 0: 942 proj = np.vstack((beta[:self.penalty_start, :], 943 beta_ * np.sqrt((2.0 * self.c - eps) / sqnorm))) 944 else: 945 proj = beta_ * np.sqrt((2.0 * self.c - eps) / sqnorm) 946 947 return proj

948

949 - def feasible(self, beta):

950 """Feasibility of the constraint. 951 952 From the interface "Constraint". 953 954 Parameters 955 ---------- 956 beta : Numpy array. The variable to check for feasibility. 957 958 Examples 959 -------- 960 >>> import numpy as np 961 >>> from parsimony.functions.penalties import L2Squared 962 >>> np.random.seed(42) 963 >>> l2 = L2Squared(c=0.3183098861837907) 964 >>> y1 = 0.1 * (np.random.rand(50, 1) * 2.0 - 1.0) 965 >>> l2.feasible(y1) 966 True 967 >>> y2 = 10.0 * (np.random.rand(50, 1) * 2.0 - 1.0) 968 >>> l2.feasible(y2) 969 False 970 >>> y3 = l2.proj(50.0 * np.random.rand(100, 1) * 2.0 - 1.0) 971 >>> l2.feasible(y3) 972 True 973 """ 974 if self.penalty_start > 0: 975 beta_ = beta[self.penalty_start:, :] 976 else: 977 beta_ = beta 978 979 sqnorm = np.dot(beta_.T, beta_)[0, 0] 980 981 return 0.5 * sqnorm <= self.c + consts.FLOAT_EPSILON

982 983

984 -class L1L2Squared(properties.AtomicFunction, 985 properties.Penalty, 986 properties.ProximalOperator):

987 """The proximal operator of the L1 function with an L2 constraint. 988 The function is 989 990 f(x) = l1 * ||x||_1 + Indicator(||x||²_2 <= l2), 991 992 where ||.||_1 is the L1 norm and ||.||²_2 is the squared L2 norm. 993 994 Parameters 995 ---------- 996 l1 : Non-negative float. The Lagrange multiplier, or regularisation 997 constant, of the L1 norm penalty. 998 999 l2 : Non-negative float. The limit of the constraint of of the squared L2 1000 norm penalty. 1001 1002 penalty_start : Non-negative integer. The number of columns, variables 1003 etc., to be exempt from penalisation. Equivalently, the first index 1004 to be penalised. Default is 0, all columns are included. 1005 """

1006 - def __init__(self, l1=1.0, l2=1.0, penalty_start=0):

1007 1008 self.l1 = max(0.0, float(l1)) 1009 self.l2 = max(0.0, float(l2)) 1010 self.penalty_start = max(0, int(penalty_start))

1011

1012 - def f(self, beta):

1013 """Function value. 1014 """ 1015 if self.penalty_start > 0: 1016 beta_ = beta[self.penalty_start:, :] 1017 else: 1018 beta_ = beta 1019 1020 if maths.norm(beta_) ** 2.0 > self.l2: 1021 return consts.FLOAT_INF 1022 1023 return self.l1 * maths.norm1(beta_)

1024

1025 - def prox(self, beta, factor=1.0, **kwargs):

1026 """The corresponding proximal operator. 1027 1028 From the interface "ProximalOperator". 1029 """ 1030 if self.penalty_start > 0: 1031 beta_ = beta[self.penalty_start:, :] 1032 else: 1033 beta_ = beta 1034 1035 l1 = self.l1 * factor 1036 prox = (np.abs(beta_) > l1) * (beta_ - l1 * np.sign(beta_ - l1)) 1037 prox *= np.sqrt(self.l2 / np.dot(prox.T, prox)[0, 0]) 1038 1039 if self.penalty_start > 0: 1040 prox = np.vstack((beta[:self.penalty_start, :], prox)) 1041 1042 return prox

1043 1044

1045 -class QuadraticConstraint(properties.AtomicFunction, 1046 properties.Gradient, 1047 properties.Penalty, 1048 properties.Constraint):

1049 """The proximal operator of the quadratic function 1050 1051 f(x) = l * (x'Mx - c), 1052 1053 or 1054 1055 f(x) = l * (x'M'Nx - c), 1056 1057 where M or M'N is a given symmatric positive-definite matrix. The 1058 constrained version has the form 1059 1060 x'Mx <= c, 1061 1062 or 1063 1064 x'M'Nx <= c 1065 1066 if two matrices are given. 1067 1068 Parameters 1069 ---------- 1070 l : Non-negative float. The Lagrange multiplier, or regularisation 1071 constant, of the function. 1072 1073 c : Float. The limit of the constraint. The function is feasible if 1074 x'Mx <= c. The default value is c=0, i.e. the default is a 1075 regularisation formulation. 1076 1077 M : Numpy array. The given positive definite matrix. It is assumed that 1078 the first penalty_start columns must be excluded. 1079 1080 N : Numpy array. The second matrix if the factors of the positive-definite 1081 matrix are given. It is assumed that the first penalty_start 1082 columns must be excluded. 1083 1084 penalty_start : Non-negative integer. The number of columns, variables 1085 etc., to be exempt from penalisation. Equivalently, the first index 1086 to be penalised. Default is 0, all columns are included. 1087 """

1088 - def __init__(self, l=1.0, c=0.0, M=None, N=None, penalty_start=0):

1089 1090 self.l = float(l) 1091 self.c = float(c) 1092 if self.penalty_start > 0: 1093 self.M = M[:, self.penalty_start:] # NOTE! We slice M here! 1094 self.N = N[:, self.penalty_start:] # NOTE! We slice N here! 1095 else: 1096 self.M = M 1097 self.N = N 1098 self.penalty_start = penalty_start

1099

1100 - def f(self, beta):

1101 """Function value. 1102 """ 1103 if self.penalty_start > 0: 1104 beta_ = beta[self.penalty_start:, :] 1105 else: 1106 beta_ = beta 1107 1108 if self.N is None: 1109 val = self.l * (np.dot(beta_.T, np.dot(self.M, beta_)) - self.c) 1110 else: 1111 val = self.l * (np.dot(beta_.T, np.dot(self.M.T, 1112 np.dot(self.N, beta_))) \ 1113 - self.c) 1114 1115 return val

1116

1117 - def grad(self, beta):

1118 """Gradient of the function. 1119 1120 From the interface "Gradient". 1121 """ 1122 if self.penalty_start > 0: 1123 beta_ = beta[self.penalty_start:, :] 1124 else: 1125 beta_ = beta 1126 1127 if self.N is None: 1128 grad = (2.0 * self.l) * np.dot(self.M, beta_) 1129 else: 1130 grad = (2.0 * self.l) * np.dot(self.M.T, np.dot(self.N, beta_)) 1131 1132 if self.penalty_start > 0: 1133 grad = np.vstack(np.zeros((self.penalty_start, 1)), grad) 1134 1135 return grad

1136

1137 - def feasible(self, beta):

1138 """Feasibility of the constraint. 1139 1140 From the interface "Constraint". 1141 """ 1142 if self.penalty_start > 0: 1143 beta_ = beta[self.penalty_start:, :] 1144 else: 1145 beta_ = beta 1146 1147 if self.N is None: 1148 bMb = np.dot(beta_.T, np.dot(self.M, beta_)) 1149 else: 1150 bMb = np.dot(beta_.T, np.dot(self.M.T, np.dot(self.N, beta_))) 1151 1152 return bMb <= self.c

1153 1154

1155 -class RGCCAConstraint(QuadraticConstraint, 1156 properties.ProjectionOperator):

1157 """Represents the quadratic function 1158 1159 f(x) = l * (x'(tau * I + ((1 - tau) / n) * X'X)x - c), 1160 1161 where tau is a given regularisation constant. The constrained version has 1162 the form 1163 1164 x'(tau * I + ((1 - tau) / n) * X'X)x <= c. 1165 1166 Parameters 1167 ---------- 1168 l : Non-negative float. The Lagrange multiplier, or regularisation 1169 constant, of the function. 1170 1171 c : Float. The limit of the constraint. The function is feasible if 1172 x'(tau * I + ((1 - tau) / n) * X'X)x <= c. The default value is 1173 c=0, i.e. the default is a regularisation formulation. 1174 1175 tau : Non-negative float. The regularisation constant. 1176 1177 X : Numpy array, n-by-p. The associated data matrix. The first 1178 penalty_start columns will be excluded. 1179 1180 unbiased : Boolean. Whether the sample variance should be unbiased or not. 1181 Default is True, i.e. unbiased. 1182 1183 penalty_start : Non-negative integer. The number of columns, variables 1184 etc., to be exempt from penalisation. Equivalently, the first index 1185 to be penalised. Default is 0, all columns are included. 1186 """

1187 - def __init__(self, l=1.0, c=0.0, tau=1.0, X=None, unbiased=True, 1188 penalty_start=0):

1189 1190 self.l = max(0.0, float(l)) 1191 self.c = float(c) 1192 self.tau = max(0.0, min(float(tau), 1.0)) 1193 if penalty_start > 0: 1194 self.X = X[:, penalty_start:] # NOTE! We slice X here! 1195 else: 1196 self.X = X 1197 self.unbiased = bool(unbiased) 1198 self.penalty_start = max(0, int(penalty_start)) 1199 1200 self.reset()

1201

1202 - def reset(self):

1203 1204 self._U = None 1205 self._S = None 1206 self._V = None

1207

1208 - def f(self, beta):

1209 """Function value. 1210 """ 1211 if self.penalty_start > 0: 1212 beta_ = beta[self.penalty_start:, :] 1213 else: 1214 beta_ = beta 1215 1216 xtMx = self._compute_value(beta_) 1217 1218 return self.l * (xtMx - self.c)

1219

1220 - def grad(self, beta):

1221 """Gradient of the function. 1222 1223 From the interface "Gradient". 1224 """ 1225 if self.penalty_start > 0: 1226 beta_ = beta[self.penalty_start:, :] 1227 else: 1228 beta_ = beta 1229 1230 if self.unbiased: 1231 n = float(self.X.shape[0] - 1.0) 1232 else: 1233 n = float(self.X.shape[0]) 1234 1235 if self.tau < 1.0: 1236 XtXbeta = np.dot(self.X.T, np.dot(self.X, beta_)) 1237 grad = (self.tau * 2.0) * beta_ \ 1238 + ((1.0 - self.tau) * 2.0 / n) * XtXbeta 1239 else: 1240 grad = (self.tau * 2.0) * beta_ 1241 1242 if self.penalty_start > 0: 1243 grad = np.vstack(np.zeros((self.penalty_start, 1)), 1244 grad) 1245 1246 # approx_grad = utils.approx_grad(self.f, beta, eps=1e-4) 1247 # print maths.norm(grad - approx_grad) 1248 1249 return grad

1250

1251 - def feasible(self, beta):

1252 """Feasibility of the constraint. 1253 1254 From the interface "Constraint". 1255 """ 1256 if self.penalty_start > 0: 1257 beta_ = beta[self.penalty_start:, :] 1258 else: 1259 beta_ = beta 1260 1261 xtMx = self._compute_value(beta_) 1262 1263 return xtMx <= self.c

1264

1265 - def proj(self, beta, **kwargs):

1266 """The projection operator corresponding to the function. 1267 1268 From the interface "ProjectionOperator". 1269 1270 Examples 1271 -------- 1272 >>> import parsimony.functions.penalties as penalties 1273 >>> import numpy as np 1274 >>> np.random.seed(42) 1275 >>> 1276 >>> X = np.random.randn(10, 10) 1277 >>> x = np.random.randn(10, 1) 1278 >>> L2 = penalties.RGCCAConstraint(c=1.0, tau=1.0, X=X, unbiased=True) 1279 >>> L2.f(x) 1280 5.7906381220390024 1281 >>> y = L2.proj(x) 1282 >>> abs(L2.f(y)) <= 2.0 * consts.FLOAT_EPSILON 1283 True 1284 >>> np.linalg.norm(y) 1285 0.99999999999999989 1286 """ 1287 if self.penalty_start > 0: 1288 beta_ = beta[self.penalty_start:, :] 1289 else: 1290 beta_ = beta 1291 1292 xtMx = self._compute_value(beta_) 1293 if xtMx <= self.c + consts.FLOAT_EPSILON: 1294 return beta 1295 1296 n, p = self.X.shape 1297 1298 if self.unbiased: 1299 n_ = float(n - 1.0) 1300 else: 1301 n_ = float(n) 1302 1303 if self.tau == 1.0: 1304 1305 sqnorm = np.dot(beta_.T, beta_) 1306 eps = consts.FLOAT_EPSILON 1307 y = beta_ * np.sqrt((self.c - eps) / sqnorm) 1308 1309 else: 1310 1311 if self._U is None or self._S is None or self._V is None: 1312 # self._U, self._S, self._V = np.linalg.svd(X_, full_matrices=0) 1313 # numpy.linalg.svd runs faster on the transpose. 1314 self._V, self._S, self._U = np.linalg.svd(self.X.T, 1315 full_matrices=0) 1316 self._V = self._V.T 1317 self._U = self._U.T 1318 self._S = ((1.0 - self.tau) / n_) * (self._S ** 2.0) + self.tau 1319 self._S = self._S.reshape((min(n, p), 1)) 1320 1321 atilde = np.dot(self._V, beta_) 1322 atilde2 = atilde ** 2.0 1323 ssdiff = np.dot(beta_.T, beta_)[0, 0] - np.sum(atilde2) 1324 atilde2lambdas = atilde2 * self._S 1325 atilde2lambdas2 = atilde2lambdas * self._S 1326 tau2 = self.tau ** 2.0 1327 1328 from parsimony.algorithms.utils import NewtonRaphson 1329 newton = NewtonRaphson(force_negative=True, 1330 parameter_positive=True, 1331 parameter_negative=False, 1332 parameter_zero=True, 1333 eps=consts.TOLERANCE, 1334 max_iter=30) 1335 1336 class F(properties.Function, 1337 properties.Gradient): 1338 1339 def __init__(self, tau, S, c): 1340 self.tau = tau 1341 self.S = S 1342 self.c = c 1343 1344 self.precomp = None 1345 self.precomp_mu = None

1346 1347 def f(self, mu): 1348 term1 = (self.tau \ 1349 / ((1.0 + 2.0 * mu * self.tau) ** 2.0)) * ssdiff 1350 1351 self.precomp = 1.0 + (2.0 * mu) * self.S 1352 self.precomp_mu = mu 1353 1354 term2 = np.sum(atilde2lambdas * (self.precomp ** -2.0)) 1355 1356 return term1 + term2 - self.c

1357 1358 def grad(self, mu): 1359 term1 = (-4.0 * tau2 \ 1360 / ((1.0 + 2.0 * mu * self.tau) ** 3.0)) * ssdiff 1361 1362 if self.precomp is None or self.precomp_mu != mu: 1363 self.precomp = 1.0 + (2.0 * mu) * self.S 1364 1365 term2 = -4.0 * np.sum(atilde2lambdas2 \ 1366 * (self.precomp ** -3.0)) 1367 1368 self.precomp = None 1369 self.precomp_mu = None 1370 1371 return term1 + term2 1372 1373 # if max(n, p) >= 1000: 1374 # # A rough heuristic for finding a start value. Works well in 1375 # # many cases, and when it does not work we have only lost one 1376 # # iteration and restart at 0.0. 1377 # start_mu = np.sqrt(min(n, p)) \ 1378 # / max(1.0, self.c) \ 1379 # / max(0.1, self.tau) 1380 # elif max(n, p) >= 100: 1381 # start_mu = 1.0 1382 # else: 1383 start_mu = 0.0 1384 mu = newton.run(F(self.tau, self._S, self.c), start_mu) 1385 1386 # Seems to be possible because of machine precision. 1387 if mu <= consts.FLOAT_EPSILON: 1388 return beta 1389 1390 if p > n: 1391 l2 = ((self._S - self.tau) \ 1392 * (1.0 / ((1.0 - self.tau) / n_))).reshape((n,)) 1393 1394 a = 1.0 + 2.0 * mu * self.tau 1395 b = 2.0 * mu * (1.0 - self.tau) / n_ 1396 y = (beta_ - np.dot(self.X.T, np.dot(self._U, 1397 (np.reciprocal(l2 + (a / b)) \ 1398 * np.dot(self._U.T, 1399 np.dot(self.X, beta_)).T).T))) * (1. / a) 1400 1401 else: # The case when n >= p 1402 l2 = ((self._S - self.tau) \ 1403 * (1.0 / ((1.0 - self.tau) / n_))).reshape((p,)) 1404 1405 a = 1.0 + 2.0 * mu * self.tau 1406 b = 2.0 * mu * (1.0 - self.tau) / n_ 1407 y = np.dot(self._V.T, (np.reciprocal(a + b * l2) * atilde.T).T) 1408 1409 if self.penalty_start > 0: 1410 y = np.vstack((beta[:self.penalty_start, :], 1411 y)) 1412 1413 return y 1414

1415 - def _compute_value(self, beta):

1416 """Helper function to compute the function value. 1417 1418 Note that beta must already be sliced! 1419 """ 1420 if self.unbiased: 1421 n = float(self.X.shape[0] - 1.0) 1422 else: 1423 n = float(self.X.shape[0]) 1424 1425 Xbeta = np.dot(self.X, beta) 1426 val = self.tau * np.dot(beta.T, beta) \ 1427 + ((1.0 - self.tau) / n) * np.dot(Xbeta.T, Xbeta) 1428 1429 return val[0, 0]

1430 1431

1432 -class RidgeSquaredError(properties.CompositeFunction, 1433 properties.Gradient, 1434 properties.StronglyConvex, 1435 properties.Penalty, 1436 properties.ProximalOperator):

1437 """Represents a ridge squared error penalty, i.e. a representation of 1438 1439 f(x) = l.((1 / (2 * n)) * ||Xb - y||²_2 + (k / 2) * ||b||²_2), 1440 1441 where ||.||²_2 is the L2 norm. 1442 1443 Parameters 1444 ---------- 1445 l : Non-negative float. The Lagrange multiplier, or regularisation 1446 constant, of the function. 1447 1448 X : Numpy array (n-by-p). The regressor matrix. 1449 1450 y : Numpy array (n-by-1). The regressand vector. 1451 1452 k : Non-negative float. The ridge parameter. 1453 1454 penalty_start : Non-negative integer. The number of columns, variables 1455 etc., to except from penalisation. Equivalently, the first 1456 index to be penalised. Default is 0, all columns are included. 1457 1458 mean : Boolean. Whether to compute the squared loss or the mean 1459 squared loss. Default is True, the mean squared loss. 1460 """

1461 - def __init__(self, X, y, k, l=1.0, penalty_start=0, mean=True):

1462 self.l = max(0.0, float(l)) 1463 self.X = X 1464 self.y = y 1465 self.k = max(0.0, float(k)) 1466 1467 self.penalty_start = max(0, int(penalty_start)) 1468 self.mean = bool(mean) 1469 1470 self.reset()

1471

1472 - def reset(self):

1473 """Free any cached computations from previous use of this Function. 1474 1475 From the interface "Function". 1476 """ 1477 self._Xty = None 1478 self._s2 = None 1479 self._V = None

1480

1481 - def f(self, x):

1482 """Function value. 1483 1484 From the interface "Function". 1485 1486 Parameters 1487 ---------- 1488 x : Numpy array. Regression coefficient vector. The point at which to 1489 evaluate the function. 1490 """ 1491 if self.penalty_start > 0: 1492 x_ = x[self.penalty_start:, :] 1493 # Xx = np.dot(self.X[:, self.penalty_start:], x_) 1494 Xx_ = np.dot(self.X, x) \ 1495 - np.dot(self.X[:, :self.penalty_start], 1496 x[:self.penalty_start, :]) 1497 # print "penalties.RidgeSquaredError, DIFF:", \ 1498 # np.linalg.norm(Xx - Xx_) 1499 else: 1500 x_ = x 1501 Xx_ = np.dot(self.X, x_) 1502 1503 if self.mean: 1504 d = 2.0 * float(self.X.shape[0]) 1505 else: 1506 d = 2.0 1507 1508 f = (1.0 / d) * np.sum((Xx_ - self.y) ** 2.0) \ 1509 + (self.k / 2.0) * np.sum(x_ ** 2.0) 1510 1511 return self.l * f

1512

1513 - def grad(self, x):

1514 """Gradient of the function at beta. 1515 1516 From the interface "Gradient". 1517 1518 Parameters 1519 ---------- 1520 x : Numpy array. The point at which to evaluate the gradient. 1521 1522 Examples 1523 -------- 1524 >>> import numpy as np 1525 >>> from parsimony.functions.losses import RidgeRegression 1526 >>> 1527 >>> np.random.seed(42) 1528 >>> X = np.random.rand(100, 150) 1529 >>> y = np.random.rand(100, 1) 1530 >>> rr = RidgeRegression(X=X, y=y, k=3.14159265) 1531 >>> beta = np.random.rand(150, 1) 1532 >>> round(np.linalg.norm(rr.grad(beta) 1533 ... - rr.approx_grad(beta, eps=1e-4)), 9) 1534 1.3e-08 1535 """ 1536 if self.penalty_start > 0: 1537 x_ = x[self.penalty_start:, :] 1538 X_ = self.X[:, self.penalty_start:] 1539 grad = np.dot(X_.T, np.dot(self.X_, x_) - self.y) 1540 del X_ 1541 else: 1542 x_ = x 1543 grad = np.dot((np.dot(self.X, x_) - self.y).T, self.X).T 1544 1545 if self.mean: 1546 grad *= 1.0 / float(self.X.shape[0]) 1547 1548 grad += self.k * x_ 1549 1550 if self.penalty_start > 0: 1551 grad = np.vstack((np.zeros((self.penalty_start, 1)), 1552 self.l * grad)) 1553 else: 1554 grad += self.l 1555 1556 return grad

1557

1558 - def L(self):

1559 """Lipschitz constant of the gradient. 1560 1561 From the interface "LipschitzContinuousGradient". 1562 """ 1563 if self._lambda_max is None: 1564 s = np.linalg.svd(self.X, full_matrices=False, compute_uv=False) 1565 1566 self._lambda_max = np.max(s) ** 2.0 1567 1568 if len(s) < self.X.shape[1]: 1569 self._lambda_min = 0.0 1570 else: 1571 self._lambda_min = np.min(s) ** 2.0 1572 1573 if self.mean: 1574 self._lambda_max /= float(self.X.shape[0]) 1575 self._lambda_min /= float(self.X.shape[0]) 1576 1577 return self.l * (self._lambda_max + self.k)

1578

1579 - def parameter(self):

1580 """Returns the strongly convex parameter for the function. 1581 1582 From the interface "StronglyConvex". 1583 """ 1584 if self._lambda_min is None: 1585 self._lambda_max = None 1586 self.L() # Precompute 1587 1588 return self.l * (self._lambda_min + self.k)

1589

1590 - def prox(self, x, factor=1.0, eps=consts.TOLERANCE, max_iter=100):

1591 """The proximal operator associated to this function. 1592 1593 Parameters 1594 ---------- 1595 x : Numpy array (p-by-1). The point at which to apply the proximal 1596 operator. 1597 1598 factor : Positive float. A factor by which the Lagrange multiplier is 1599 scaled. This is usually the step size. 1600 1601 eps : Positive float. This is the stopping criterion for inexact 1602 proximal methods, where the proximal operator is approximated 1603 numerically. 1604 1605 max_iter : Positive integer. This is the maximum number of iterations 1606 for inexact proximal methods, where the proximal operator is 1607 approximated numerically. 1608 1609 index : Non-negative integer. For multivariate functions, this 1610 identifies the variable for which the proximal operator is 1611 associated. 1612 1613 From the interface "ProximalOperator". 1614 """ 1615 # y = inv(X'.X + (k + 1 / l).I).((1 / l).x + X'.v) 1616 n, p = self.X.shape 1617 rho = 1.0 / self.l 1618 1619 if self._Xty is None: 1620 self._Xty = np.dot(self.X.T, self.y) 1621 1622 v = rho * x + self._Xty 1623 c = self.k + rho 1624 1625 if n >= p: 1626 if self._s2 is None or self._V is None: 1627 # # Ridge solution 1628 # XtX_klI = np.dot(self.X.T, self.X) 1629 # index = np.arange(min(XtX_klI.shape)) 1630 # XtX_klI[index, index] += c 1631 # self._inv_XtX_klI = np.linalg.inv(XtX_klI) 1632 1633 _, self._s2, self._V = np.linalg.svd(self.X) 1634 self._V = self._V.T 1635 self._s2 = self._s2.reshape((p, 1)) ** 2.0 1636 # _inv_XtX_klI = np.dot(V, np.reciprocal(c + s ** 2) * V.T) 1637 1638 # y = np.dot(self._inv_XtX_klI, v) 1639 y = np.dot(self._V, 1640 np.reciprocal(c + self._s2) * np.dot(self._V.T, v)) 1641 1642 1643 else: # If n < p 1644 if self._s2 is None or self._V is None: 1645 # # Ridge solution using the Woodbury matrix identity. 1646 # XXt_klI = np.dot(self.X, self.X.T) 1647 # index = np.arange(min(XXt_klI.shape)) 1648 # XXt_klI[index, index] += c 1649 # self._inv_XtX_klI = np.linalg.inv(XXt_klI) 1650 1651 _, self._s2, self._V = np.linalg.svd(self.X.T) 1652 self._V = self._V.T 1653 self._s2 = self._s2.reshape((n, 1)) ** 2.0 1654 # _inv_XtX_klI = np.dot(V, np.reciprocal(c + s ** 2) * V.T) 1655 1656 # y = (v - np.dot(self.X.T, np.dot(self._inv_XtX_klI, 1657 # np.dot(self.X, v)))) / c 1658 Xv = np.dot(self.X, v) 1659 y = (v - np.dot(self.X.T, np.dot(self._V, 1660 np.reciprocal(c + self._s2) \ 1661 * np.dot(self._V.T, Xv)))) \ 1662 * (1.0 / c) 1663 1664 return y

1665 1666

1667 -class LinearVariableConstraint(properties.IndicatorFunction, 1668 properties.Constraint, 1669 properties.ProjectionOperator):

1670 """Represents a linear constraint 1671 1672 r = Ax, 1673 1674 where both x and r are variables. 1675 1676 Parameters 1677 ---------- 1678 A : Numpy or sparse scipy array. The linear map between x and r. 1679 """

1680 - def __init__(self, A, penalty_start=0, solver=linalgs.SparseSolver()):

1681 1682 self.A = A 1683 1684 self.penalty_start = max(0, int(penalty_start)) 1685 1686 self.solver = solver 1687 1688 self.reset()

1689

1690 - def reset(self):

1691 1692 self._inv_AtA_I = None

1693

1694 - def f(self, xr):

1695 """The function value of this indicator function. The function value is 1696 0 if the constraint is feasible and infinite otherwise. 1697 1698 Parameters 1699 ---------- 1700 xr : List or tuple with two elements, numpy arrays. The first element 1701 is x and the second is r. 1702 """ 1703 if self.feasible(xr): 1704 return 0.0 1705 else: 1706 return np.inf

1707

1708 - def feasible(self, xr):

1709 """Feasibility of the constraint at points x and r. 1710 1711 From the interface Constraint. 1712 1713 Parameters 1714 ---------- 1715 xr : List or tuple with two elements, numpy arrays. The first element 1716 is x and the second is r. 1717 """ 1718 if isinstance(xr, linalgs.MultipartArray): 1719 xr = xr.get_parts() 1720 1721 x = xr[0] 1722 1723 if self.penalty_start > 0: 1724 x_ = x[self.penalty_start:, :] 1725 else: 1726 x_ = x 1727 1728 r = xr[1] 1729 1730 Ax = [0.0] * len(self.A) 1731 for i in xrange(len(self.A)): 1732 Ax[i] = self.A[i].dot(x_) 1733 Ax = np.vstack(Ax) 1734 1735 return maths.norm(Ax - r) < consts.TOLERANCE

1736

1737 - def proj(self, xr):

1738 """The projection operator corresponding to the function. 1739 1740 From the interface ProjectionOperator. 1741 1742 Parameters 1743 ---------- 1744 xr : List or tuple with two elements, numpy arrays. The first element 1745 is x and the second is r. 1746 """ 1747 if isinstance(xr, linalgs.MultipartArray): 1748 xr = xr.get_parts() 1749 1750 x = xr[0] 1751 p = x.shape[0] 1752 # The inverse of a 1000-by-1000 matrix takes roughly 1 second. 1753 # This is the cut-off point on my computer for where it is no more 1754 # feasible to compute the inverse. After this, the time to compute the 1755 # inverse grows very quickly. 1756 p_limit = 1000 1757 1758 if self.penalty_start > 0: 1759 x_ = x[self.penalty_start:, :] 1760 else: 1761 x_ = x 1762 1763 r = xr[1] 1764 1765 # Save a few calls to __getitem__. 1766 A = self.A 1767 1768 # Check feasibility 1769 Ax = [0.0] * len(A) 1770 for i in xrange(len(A)): 1771 Ax[i] = A[i].dot(x_) 1772 Ax = np.vstack(Ax) 1773 if maths.norm(Ax - r) < consts.TOLERANCE: 1774 return xr 1775 1776 # Precompute 1777 if self._inv_AtA_I is None: 1778 1779 AtA = A[0].T.dot(A[0]) 1780 if len(A) >= 2: 1781 AtA = AtA + A[1].T.dot(A[1]) 1782 if len(A) >= 3: 1783 AtA = AtA + A[2].T.dot(A[2]) 1784 if len(A) >= 4: 1785 AtA = AtA + A[3].T.dot(A[3]) 1786 if len(A) > 4: 1787 for i in xrange(4, len(A)): 1788 AtA = AtA + A[i].T.dot(A[i]) 1789 1790 AtA_I = AtA + sparse.eye(*AtA.shape, format=AtA.format) 1791 if p >= p_limit: 1792 self._inv_AtA_I = AtA_I.todia() 1793 else: 1794 self._inv_AtA_I = np.linalg.inv(AtA_I.toarray()) 1795 1796 Atr = 0.0 1797 start = 0 1798 end = 0 1799 for i in xrange(len(A)): 1800 end += A[i].shape[0] 1801 Atr += A[i].T.dot(r[start:end]) 1802 start = end 1803 1804 if p >= p_limit: 1805 z = self.solver.solve(self._inv_AtA_I, Atr + x_) 1806 else: 1807 z = np.dot(self._inv_AtA_I, Atr + x_) 1808 1809 Az = [0.0] * len(A) 1810 for i in xrange(len(A)): 1811 Az[i] = A[i].dot(z) 1812 s = np.vstack(Az) 1813 1814 return linalgs.MultipartArray([z, s], vertical=True)

1815 1816

1817 -class SufficientDescentCondition(properties.Function, 1818 properties.Constraint):

1819

1820 - def __init__(self, function, p, c=1e-4):

1821 """The sufficient condition 1822 1823 f(x + a * p) <= f(x) + c * a * grad(f(x))'p 1824 1825 for descent. This condition is sometimes called the Armijo condition. 1826 1827 Parameters 1828 ---------- 1829 p : Numpy array. The descent direction. 1830 1831 c : Float, 0 < c < 1. A constant for the condition. Should be small. 1832 """ 1833 self.function = function 1834 self.p = p 1835 self.c = c

1836

1837 - def f(self, x, a):

1838 1839 return self.function.f(x + a * self.p)

1840

1841 - def feasible(self, xa):

1842 """Feasibility of the constraint at point x with step a. 1843 1844 From the interface "Constraint". 1845 """ 1846 x = xa[0] 1847 a = xa[1] 1848 1849 f_x_ap = self.function.f(x + a * self.p) 1850 f_x = self.function.f(x) 1851 grad_p = np.dot(self.function.grad(x).T, self.p)[0, 0] 1852 # print "f_x_ap = %.10f, f_x = %.10f, grad_p = %.10f, feas = %.10f" \ 1853 # % (f_x_ap, f_x, grad_p, f_x + self.c * a * grad_p) 1854 # if grad_p >= 0.0: 1855 # pass 1856 feasible = f_x_ap <= f_x + self.c * a * grad_p 1857 1858 return feasible

1859 1860 1861 #class WolfeCondition(Function, Constraint): 1862 # 1863 # def __init__(self, function, p, c1=1e-4, c2=0.9): 1864 # """ 1865 # Parameters: 1866 # ---------- 1867 # c1 : Float. 0 < c1 < c2 < 1. A constant for the condition. Should be 1868 # small. 1869 # c2 : Float. 0 < c1 < c2 < 1. A constant for the condition. Depends on 1870 # the minimisation algorithms. For Newton or quasi-Newton 1871 # descent directions, 0.9 is a good choice. 0.1 is appropriate 1872 # for nonlinear conjugate gradient. 1873 # """ 1874 # self.function = function 1875 # self.p = p 1876 # self.c1 = c1 1877 # self.c2 = c2 1878 # 1879 # def f(self, x, a): 1880 # 1881 # return self.function.f(x + a * self.p) 1882 # 1883 # """Feasibility of the constraint at point x. 1884 # 1885 # From the interface "Constraint". 1886 # """ 1887 # def feasible(self, x, a): 1888 # 1889 # grad_p = np.dot(self.function.grad(x).T, self.p)[0, 0] 1890 # cond1 = self.function.f(x + a * self.p) \ 1891 # <= self.function.f(x) + self.c1 * a * grad_p 1892 # cond2 = np.dot(self.function.grad(x + a * self.p).T, self.p)[0, 0] \ 1893 # >= self.c2 * grad_p 1894 # 1895 # return cond1 and cond2 1896 # 1897 # 1898 #class StrongWolfeCondition(Function, Constraint): 1899 # 1900 # def __init__(self, function, p, c1=1e-4, c2=0.9): 1901 # """ 1902 # Parameters: 1903 # ---------- 1904 # c1 : Float. 0 < c1 < c2 < 1. A constant for the condition. Should be 1905 # small. 1906 # c2 : Float. 0 < c1 < c2 < 1. A constant for the condition. Depends on 1907 # the minimisation algorithms. For Newton or quasi-Newton 1908 # descent directions, 0.9 is a good choice. 0.1 is appropriate 1909 # for nonlinear conjugate gradient. 1910 # """ 1911 # self.function = function 1912 # self.p = p 1913 # self.c1 = c1 1914 # self.c2 = c2 1915 # 1916 # def f(self, x, a): 1917 # 1918 # return self.function.f(x + a * self.p) 1919 # 1920 # """Feasibility of the constraint at point x. 1921 # 1922 # From the interface "Constraint". 1923 # """ 1924 # def feasible(self, x, a): 1925 # 1926 # grad_p = np.dot(self.function.grad(x).T, self.p)[0, 0] 1927 # cond1 = self.function.f(x + a * self.p) \ 1928 # <= self.function.f(x) + self.c1 * a * grad_p 1929 # grad_x_ap = self.function.grad(x + a * self.p) 1930 # cond2 = abs(np.dot(grad_x_ap.T, self.p)[0, 0]) \ 1931 # <= self.c2 * abs(grad_p) 1932 # 1933 # return cond1 and cond2 1934 1935 if __name__ == "__main__": 1936 import doctest 1937 doctest.testmod() 1938

Source Code for Module parsimony.functions.penalties