Package parsimony :: Package functions :: Package multiblock :: Module losses
[hide private]
[frames] | no frames]

Source Code for Module parsimony.functions.multiblock.losses

   1  # -*- coding: utf-8 -*- 
   2  """ 
   3  The :mod:`parsimony.functions.losses` module contains multiblock loss 
   4  functions. 
   5   
   6  Copyright (c) 2013-2014, CEA/DSV/I2BM/Neurospin. All rights reserved. 
   7   
   8  Created on Tue Feb  4 08:51:43 2014 
   9   
  10  @author:  Tommy Löfstedt 
  11  @email:   lofstedt.tommy@gmail.com 
  12  @license: BSD 3-clause. 
  13  """ 
  14  import abc 
  15  import numbers 
  16   
  17  import numpy as np 
  18   
  19  import parsimony.utils as utils 
  20  import parsimony.utils.maths as maths 
  21  import parsimony.functions.properties as properties 
  22  import parsimony.utils.consts as consts 
  23  import properties as mb_properties 
  24   
  25  __all__ = ["CombinedMultiblockFunction", 
  26             "MultiblockFunctionWrapper", "MultiblockNesterovFunctionWrapper", 
  27             "LatentVariableCovariance"] 
  28   
  29   
30 -class CombinedMultiblockFunction(mb_properties.MultiblockFunction, 31 mb_properties.MultiblockGradient, 32 mb_properties.MultiblockProximalOperator, 33 mb_properties.MultiblockProjectionOperator, 34 # mb_properties.MultiblockContinuation, 35 mb_properties.MultiblockStepSize):
36 """Combines one or more loss functions, any number of penalties and zero 37 or one proximal operator. 38 39 This function thus represents 40 41 f(x) = g_1(x) [ + g_2(x) ... ] [ + d_1(x) ... ] [ + N_1(x) ...] 42 [ + h_1(x) ...], 43 44 subject to [ C_1(x) <= c_1, 45 C_2(x) <= c_2, 46 ... ], 47 48 where g_i are differentiable Functions that may be multiblock, d_j are 49 differentiable penalties, h_k are a ProximalOperators and N_l are 50 NesterovFunctions. All functions and penalties must thus have Gradients, 51 unless they are ProximalOperators. 52 53 If no ProximalOperator is given, prox will be the identity. 54 55 Parameters 56 ---------- 57 X : List of numpy arrays. The blocks of data in the multiblock model. 58 59 functions : List of lists of lists. A function matrix, with element 60 i,j connecting block i to block j. 61 62 penalties : A list of lists. Element i of the outer list is also a list 63 that contains the penalties for block i. 64 65 prox : A list of lists. Element i of the outer list is also a list that 66 contains the penalties that can be expressed as proximal operators 67 for block i. 68 69 constraints : A list of lists. Element i of the outer list is also a list 70 that contains the constraints for block i. 71 """
72 - def __init__(self, X, functions=[], penalties=[], prox=[], constraints=[]):
73 74 self.K = len(X) 75 self.X = X 76 77 if len(functions) != self.K: 78 self._f = [0] * self.K 79 for i in xrange(self.K): 80 self._f[i] = [0] * self.K 81 for j in xrange(self.K): 82 self._f[i][j] = list() 83 else: 84 self._f = functions 85 86 if len(penalties) != self.K: 87 self._p = [0] * self.K 88 self._N = [0] * self.K 89 for i in xrange(self.K): 90 self._p[i] = list() 91 self._N[i] = list() 92 else: 93 self._p = [0] * self.K 94 self._N = [0] * self.K 95 for i in xrange(self.K): 96 self._p[i] = list() 97 self._N[i] = list() 98 for di in penalties[i]: 99 if isinstance(di, properties.NesterovFunction): 100 self._N[i].append(di) 101 else: 102 self._p[i].append(di) 103 104 if len(prox) != self.K: 105 self._prox = [0] * self.K 106 for i in xrange(self.K): 107 self._prox[i] = list() 108 else: 109 self._prox = prox 110 111 if len(constraints) != self.K: 112 self._c = [0] * self.K 113 for i in xrange(self.K): 114 self._c[i] = list() 115 else: 116 self._c = constraints
117
118 - def reset(self):
119 120 for fi in self._f: 121 for fij in fi: 122 for fijk in fij: 123 fijk.reset() 124 125 for pi in self._p: 126 for pik in pi: 127 pik.reset() 128 129 for Ni in self._N: 130 for Nik in Ni: 131 Nik.reset() 132 133 for proxi in self._prox: 134 for proxik in proxi: 135 proxik.reset() 136 137 for ci in self._c: 138 for cik in ci: 139 cik.reset()
140
141 - def add_function(self, function, i, j):
142 """Add a function that connects blocks i and j. 143 144 Parameters 145 ---------- 146 function : Function or MultiblockFunction. A function that connects 147 block i and block j. 148 149 i : Non-negative integer. Index of the first block. Zero based, so 0 150 is the first block. 151 152 j : Non-negative integer. Index of the second block. Zero based, so 0 153 is the first block. 154 """ 155 if not isinstance(function, properties.Gradient): 156 if not isinstance(function, mb_properties.MultiblockGradient): 157 raise ValueError("Functions must have gradients.") 158 159 self._f[i][j].append(function)
160
161 - def add_penalty(self, penalty, i):
162 163 if not isinstance(penalty, properties.Penalty): 164 raise ValueError("Not a penalty.") 165 166 if isinstance(penalty, properties.NesterovFunction): 167 self._N[i].append(penalty) 168 else: 169 if isinstance(penalty, properties.Gradient): 170 self._p[i].append(penalty) 171 else: 172 if isinstance(penalty, properties.ProximalOperator): 173 self._prox[i].append(penalty) 174 else: 175 raise ValueError("Non-smooth and no proximal operator.")
176 177 # @utils.deprecated("add_penalty")
178 - def add_prox(self, penalty, i):
179 180 if not isinstance(penalty, properties.ProximalOperator): 181 raise ValueError("Not a proximal operator.") 182 183 self._prox[i].append(penalty)
184
185 - def add_constraint(self, constraint, i):
186 187 if not isinstance(constraint, properties.Constraint): 188 raise ValueError("Not a constraint.") 189 if not isinstance(constraint, properties.ProjectionOperator): 190 raise ValueError("Constraints must have projection operators.") 191 192 self._c[i].append(constraint)
193
194 - def has_nesterov_function(self, index):
195 196 return len(self._N[index]) > 0
197
198 - def f(self, w):
199 """Function value. 200 201 Parameters 202 ---------- 203 w : List of numpy arrays. The weight vectors. 204 """ 205 val = 0.0 206 207 for i in xrange(len(self._f)): 208 fi = self._f[i] 209 for j in xrange(len(fi)): 210 fij = self._f[i][j] 211 for k in xrange(len(fij)): 212 val += fij[k].f([w[i], w[j]]) 213 214 for i in xrange(len(self._p)): 215 pi = self._p[i] 216 for k in xrange(len(pi)): 217 val += pi[k].f(w[i]) 218 219 for i in xrange(len(self._N)): 220 Ni = self._N[i] 221 for k in xrange(len(Ni)): 222 val += Ni[k].f(w[i]) 223 224 for i in xrange(len(self._prox)): 225 proxi = self._prox[i] 226 for k in xrange(len(proxi)): 227 val += proxi[k].f(w[i]) 228 229 return val
230
231 - def fmu(self, w):
232 """Function value of smoothed function. 233 234 Parameters 235 ---------- 236 w : List of numpy arrays. The weight vectors. 237 """ 238 val = 0.0 239 240 for i in xrange(len(self._f)): 241 fi = self._f[i] 242 for j in xrange(len(fi)): 243 fij = self._f[i][j] 244 for k in xrange(len(fij)): 245 val += fij[k].f([w[i], w[j]]) 246 247 for i in xrange(len(self._p)): 248 pi = self._p[i] 249 for k in xrange(len(pi)): 250 val += pi[k].f(w[i]) 251 252 for i in xrange(len(self._N)): 253 Ni = self._N[i] 254 for k in xrange(len(Ni)): 255 val += Ni[k].fmu(w[i]) 256 257 for i in xrange(len(self._prox)): 258 proxi = self._prox[i] 259 for k in xrange(len(proxi)): 260 val += proxi[k].f(w[i]) 261 262 return val
263
264 - def grad(self, w, index):
265 """Gradient of the differentiable part of the function. 266 267 From the interface "MultiblockGradient". 268 269 Parameters 270 ---------- 271 w : List of numpy arrays. The weight vectors, w[index] is the point at 272 which to evaluate the gradient. 273 274 index : Non-negative integer. Which variable the step is for. 275 """ 276 grad = np.zeros(w[index].shape) 277 278 # Add gradients from the loss functions. 279 fi = self._f[index] 280 for j in xrange(len(fi)): 281 fij = fi[j] 282 for k in xrange(len(fij)): 283 fijk = fij[k] 284 if isinstance(fijk, properties.Gradient): 285 grad += fijk.grad(w[index]) 286 elif isinstance(fijk, mb_properties.MultiblockGradient): 287 grad += fijk.grad([w[index], w[j]], 0) 288 289 for i in xrange(len(self._f)): 290 fij = self._f[i][index] 291 if i != index: # Do not count these twice. 292 for k in xrange(len(fij)): 293 fijk = fij[k] 294 if isinstance(fijk, properties.Gradient): 295 # We shouldn't do anything here, right? This means e.g. 296 # that this (block i) is the y of a logistic regression. 297 pass 298 # grad += fij.grad(w[i]) 299 elif isinstance(fijk, mb_properties.MultiblockGradient): 300 grad += fijk.grad([w[i], w[index]], 1) 301 302 # Add gradients from the penalties. 303 pi = self._p[index] 304 for k in xrange(len(pi)): 305 grad += pi[k].grad(w[index]) 306 307 Ni = self._N[index] 308 for k in xrange(len(Ni)): 309 grad += Ni[k].grad(w[index]) 310 311 return grad
312
313 - def prox(self, w, index, factor=1.0, eps=consts.TOLERANCE, max_iter=100):
314 """The proximal operator of the non-differentiable part of the 315 function with the given index. 316 317 From the interface "MultiblockProximalOperator". 318 319 Parameters 320 ---------- 321 w : List of numpy arrays. The weight vectors. 322 323 index : Non-negative integer. Which variable the step is for. 324 325 factor : Positive float. A factor by which the Lagrange multiplier is 326 scaled. This is usually the step size. 327 """ 328 prox = self._prox[index] 329 proj = self._c[index] 330 331 if len(prox) == 0 and len(proj) == 0: 332 prox_w = w[index] 333 334 elif len(prox) == 1 and len(proj) == 0: 335 prox_w = prox[0].prox(w[index]) 336 337 elif len(prox) == 0 and (len(proj) == 1 or len(proj) == 2): 338 prox_w = self.proj(w, index, eps=eps, max_iter=max_iter) 339 340 else: 341 from parsimony.algorithms.proximal \ 342 import ParallelDykstrasProximalAlgorithm 343 combo = ParallelDykstrasProximalAlgorithm(output=False, 344 eps=eps, 345 max_iter=max_iter, 346 min_iter=1) 347 prox_w = combo.run(w[index], prox=prox, proj=proj, factor=factor) 348 349 return prox_w
350
351 - def proj(self, w, index, eps=consts.TOLERANCE, max_iter=100):
352 """The projection operator of a constraint that corresponds to the 353 function with the given index. 354 355 From the interface "MultiblockProjectionOperator". 356 357 Parameters 358 ---------- 359 w : List of numpy arrays. The weight vectors. 360 361 index : Non-negative integer. Which variable the step is for. 362 """ 363 prox = self._prox[index] 364 proj = self._c[index] 365 if len(proj) == 1 and len(prox) == 0: 366 proj_w = proj[0].proj(w[index], eps=eps, max_iter=max_iter) 367 368 elif len(proj) == 2 and len(prox) == 0: 369 from parsimony.algorithms.proximal \ 370 import DykstrasProjectionAlgorithm 371 combo = DykstrasProjectionAlgorithm(eps=eps, 372 max_iter=max_iter, min_iter=1) 373 374 proj_w = combo.run(proj, w[index]) 375 376 else: 377 proj_w = self.prox(w, index, eps=eps, max_iter=max_iter) 378 379 return proj_w
380
381 - def step(self, w, index):
382 """The step size to use in descent methods. 383 384 From the interface "StepSize". 385 386 Parameters 387 ---------- 388 w : Numpy array. The point at which to determine the step size. 389 390 index : Non-negative integer. The variable which the step is for. 391 """ 392 all_lipschitz = True 393 L = 0.0 394 395 # Add Lipschitz constants from the loss functions. 396 fi = self._f[index] 397 for j in xrange(len(fi)): 398 fij = fi[j] 399 for k in xrange(len(fij)): 400 fijk = fij[k] 401 if isinstance(fijk, properties.Gradient): 402 if not isinstance(fijk, 403 properties.LipschitzContinuousGradient): 404 all_lipschitz = False 405 break 406 else: 407 L += fijk.L(w[index]) 408 elif isinstance(fijk, mb_properties.MultiblockGradient): 409 if not isinstance(fijk, 410 mb_properties.MultiblockLipschitzContinuousGradient): 411 all_lipschitz = False 412 break 413 else: 414 L += fijk.L([w[index], w[j]], 0) 415 416 if not all_lipschitz: 417 break 418 419 for i in xrange(len(self._f)): 420 fij = self._f[i][index] 421 if i != index: # Do not visit these twice. 422 for k in xrange(len(fij)): 423 fijk = fij[k] 424 if isinstance(fijk, properties.Gradient): 425 # We shouldn't do anything here, right? This means that 426 # this (block i) is e.g. the y in a logistic 427 # regression. 428 pass 429 elif isinstance(fijk, mb_properties.MultiblockGradient): 430 if not isinstance(fijk, 431 mb_properties.MultiblockLipschitzContinuousGradient): 432 all_lipschitz = False 433 break 434 else: 435 L += fijk.L([w[i], w[index]], 1) 436 437 # Add Lipschitz constants from the penalties. 438 pi = self._p[index] 439 for k in xrange(len(pi)): 440 if not isinstance(pi[k], properties.LipschitzContinuousGradient): 441 all_lipschitz = False 442 break 443 else: 444 L += pi[k].L() # w[index]) 445 446 Ni = self._N[index] 447 for k in xrange(len(Ni)): 448 if not isinstance(Ni[k], properties.LipschitzContinuousGradient): 449 all_lipschitz = False 450 break 451 else: 452 L += Ni[k].L() # w[index]) 453 454 step = 0.0 455 if all_lipschitz and L >= consts.TOLERANCE: 456 step = 1.0 / L 457 else: 458 # If all functions did not have Lipschitz continuous gradients, 459 # try to find the step size through backtracking line search. 460 class F(properties.Function, 461 properties.Gradient): 462 463 def __init__(self, func, w, index): 464 self.func = func 465 self.w = w 466 self.index = index
467 468 def f(self, x): 469 470 # Temporarily replace the index:th variable with x. 471 w_old = self.w[self.index] 472 self.w[self.index] = x 473 f = self.func.f(w) 474 self.w[self.index] = w_old 475 476 return f
477 478 def grad(self, x): 479 480 # Temporarily replace the index:th variable with x. 481 w_old = self.w[self.index] 482 self.w[self.index] = x 483 g = self.func.grad(w, index) 484 self.w[self.index] = w_old 485 486 return g 487 488 func = F(self, w, index) 489 p = -self.grad(w, index) 490 491 from parsimony.algorithms.utils import BacktrackingLineSearch 492 import parsimony.functions.penalties as penalties 493 line_search = BacktrackingLineSearch( 494 condition=penalties.SufficientDescentCondition, max_iter=30) 495 a = np.sqrt(1.0 / self.X[index].shape[1]) # Arbitrarily "small". 496 step = line_search.run(func, w[index], p, rho=0.5, a=a, 497 condition_params={"c": 1e-4}) 498 499 return step 500 501
502 -class MultiblockFunctionWrapper(properties.CompositeFunction, 503 properties.Gradient, 504 properties.StepSize, 505 properties.ProximalOperator):
506
507 - def __init__(self, function, w, index):
508 self.function = function 509 self.w = w 510 self.index = index
511
512 - def f(self, w):
513 """Function value. 514 515 From the interface "Function". 516 517 Parameters 518 ---------- 519 w : Numpy array (p-by-1). The point at which to evaluate the function. 520 """ 521 return self.function.f(self.w[:self.index] + \ 522 [w] + \ 523 self.w[self.index + 1:])
524
525 - def grad(self, w):
526 """Gradient of the function. 527 528 Parameters 529 ---------- 530 w : Numpy array (p-by-1). The point at which to evaluate the gradient. 531 """ 532 return self.function.grad(self.w[:self.index] + \ 533 [w] + \ 534 self.w[self.index + 1:], 535 self.index)
536
537 - def prox(self, w, factor=1.0, eps=consts.TOLERANCE, max_iter=100):
538 """The proximal operator corresponding to the function. 539 540 Parameters 541 ---------- 542 w : Numpy array (p-by-1). The point at which to apply the proximal 543 operator. 544 545 factor : Positive float. A factor by which the Lagrange multiplier is 546 scaled. This is usually the step size. 547 """ 548 return self.function.prox(self.w[:self.index] + \ 549 [w] + \ 550 self.w[self.index + 1:], 551 self.index, factor=factor, 552 eps=eps, max_iter=max_iter)
553
554 - def step(self, w, index=0):
555 """The step size to use in descent methods. 556 557 Parameters 558 ---------- 559 w : Numpy array. The point at which to determine the step size. 560 """ 561 return self.function.step(self.w[:self.index] + \ 562 [w] + \ 563 self.w[self.index + 1:], 564 self.index)
565 566
567 -class MultiblockNesterovFunctionWrapper(MultiblockFunctionWrapper, 568 properties.NesterovFunction, 569 properties.Continuation):
570
571 - def __init__(self, function, w, index):
572 super(MultiblockNesterovFunctionWrapper, self).__init__(function, 573 w, 574 index)
575
576 - def get_params(self, *args):
577 578 Ni = self.function._N[self.index] 579 580 ret = dict() 581 for k in args: 582 params = [] 583 for N in Ni: 584 value = getattr(N, k) 585 params.append(value) 586 587 ret[k] = params 588 589 return ret
590
591 - def fmu(self, beta, mu=None):
592 """Returns the smoothed function value. 593 594 From the interface "NesterovFunction". 595 596 Parameters 597 ---------- 598 beta : Numpy array. A weight vector. 599 600 mu : Non-negative float. The regularisation constant for the smoothing. 601 """ 602 Ni = self.function._N[self.index] 603 f = 0.0 604 for N in Ni: 605 f += N.fmu(beta, mu=mu) 606 607 return f
608
609 - def phi(self, alpha, beta):
610 """ Function value with known alpha. 611 612 From the interface "NesterovFunction". 613 """ 614 raise NotImplementedError('Abstract method "phi" must be ' 615 'specialised!')
616
617 - def get_mu(self):
618 """Returns the regularisation constant for the smoothing. 619 620 From the interface "NesterovFunction". 621 """ 622 Ni = self.function._N[self.index] 623 if len(Ni) == 0: 624 raise ValueError("No penalties are Nesterov functions.") 625 626 return Ni[0].get_mu()
627
628 - def set_mu(self, mu):
629 """Sets the regularisation constant for the smoothing. 630 631 From the interface "NesterovFunction". 632 633 Parameters 634 ---------- 635 mu : Non-negative float. The regularisation constant for the smoothing 636 to use from now on. 637 638 Returns 639 ------- 640 old_mu : Non-negative float. The old regularisation constant for the 641 smoothing that was overwritten and no longer is used. 642 """ 643 old_mu = self.get_mu() 644 645 Ni = self.function._N[self.index] 646 for N in Ni: 647 N.set_mu(mu) 648 649 return old_mu
650
651 - def alpha(self, beta):
652 """ Dual variable of the Nesterov function. 653 654 From the interface "NesterovFunction". 655 656 Parameters 657 ---------- 658 beta : Numpy array (p-by-1). The variable for which to compute the dual 659 variable alpha. 660 """ 661 Ni = self.function._N[self.index] 662 alpha = [] 663 for N in Ni: 664 alpha += N.alpha(beta) 665 666 return alpha
667
668 - def A(self):
669 """ Linear operator of the Nesterov function. 670 671 From the interface "NesterovFunction". 672 """ 673 Ni = self.function._N[self.index] 674 A = [] 675 for N in Ni: 676 A += N.A()
677
678 - def Aa(self, alpha):
679 """ Compute A'*alpha. 680 681 From the interface "NesterovFunction". 682 683 Parameters 684 ---------- 685 alpha : Numpy array (x-by-1). The dual variable alpha. 686 """ 687 A = self.A() 688 Aa = A[0].T.dot(alpha[0]) 689 for i in xrange(1, len(A)): 690 Aa += A[i].T.dot(alpha[i]) 691 692 return Aa
693
694 - def project(self, alpha):
695 """ Projection onto the compact space of the Nesterov function. 696 697 From the interface "NesterovFunction". 698 699 Parameters 700 ---------- 701 alpha : Numpy array (x-by-1). The not-yet-projected dual variable 702 alpha. 703 """ 704 Ni = self.function._N[self.index] 705 a = [] 706 i = 0 707 for N in Ni: 708 A = N.A() 709 a += N.project(alpha[i:len(A)]) 710 i += len(A) 711 712 return a
713
714 - def M(self):
715 """ The maximum value of the regularisation of the dual variable. We 716 have 717 718 M = max_{alpha in K} 0.5*|alpha|²_2. 719 720 From the interface "NesterovFunction". 721 """ 722 Ni = self.function._N[self.index] 723 M = 0.0 724 for N in Ni: 725 M += N.M() 726 727 return M
728
729 - def estimate_mu(self, beta):
730 """ Compute a "good" value of mu with respect to the given beta. 731 732 From the interface "NesterovFunction". 733 734 Parameters 735 ---------- 736 beta : Numpy array (p-by-1). The primal variable at which to compute a 737 feasible value of mu. 738 """ 739 Ni = self.function._N[self.index] 740 mu = consts.TOLERANCE 741 for N in Ni: 742 mu = max(mu, N.estimate_mu(beta)) 743 744 return mu
745
746 - def mu_opt(self, eps):
747 """The optimal value of mu given epsilon. 748 749 Parameters 750 ---------- 751 eps : Positive float. The desired precision. 752 753 Returns 754 ------- 755 mu : Positive float. The optimal regularisation parameter. 756 757 From the interface "Continuation". 758 """ 759 raise NotImplementedError('Abstract method "mu_opt" must be ' 760 'specialised!')
761
762 - def eps_opt(self, mu):
763 """The optimal value of epsilon given mu. 764 765 Parameters 766 ---------- 767 mu : Positive float. The regularisation constant of the smoothing. 768 769 Returns 770 ------- 771 eps : Positive float. The optimal precision. 772 773 From the interface "Continuation". 774 """ 775 raise NotImplementedError('Abstract method "eps_opt" must be ' 776 'specialised!')
777
778 - def eps_max(self, mu):
779 """The maximum value of epsilon. 780 781 From the interface "Continuation". 782 783 Parameters 784 ---------- 785 mu : Positive float. The regularisation constant of the smoothing. 786 787 Returns 788 ------- 789 eps : Positive float. The upper limit, the maximum, precision. 790 """ 791 Ni = self.function._N[self.index] 792 gM = 0.0 793 for N in Ni: 794 gM += N.l * N.M() 795 796 return float(mu) * gM
797
798 - def mu_max(self, eps):
799 """The maximum value of mu. 800 801 From the interface "Continuation". 802 803 Parameters 804 ---------- 805 eps : Positive float. The maximum precision of the smoothing. 806 807 Returns 808 ------- 809 mu : Positive float. The upper limit, the maximum, of the 810 regularisation constant of the smoothing. 811 """ 812 Ni = self.function._N[self.index] 813 gM = 0.0 814 for N in Ni: 815 gM += N.l * N.M() 816 817 return float(eps) / gM
818 819
820 -class LatentVariableCovariance(mb_properties.MultiblockFunction, 821 mb_properties.MultiblockGradient, 822 mb_properties.MultiblockLipschitzContinuousGradient):
823 #properties.Eigenvalues): 824 """Represents 825 826 Cov(X.w, Y.c) = (1 / (n - 1)) * w'.X'.Y.c, 827 828 where X.w and Y.c are latent variables. 829 830 Parameters 831 ---------- 832 X : List with two numpy arrays. The two blocks. 833 834 unbiased : Boolean. Whether or not to use biased or unbiased sample 835 covariance. Default is True, the unbiased sample covariance is 836 used. 837 """
838 - def __init__(self, X, unbiased=True):
839 840 self.X = X 841 if unbiased: 842 self.n = float(X[0].shape[0] - 1.0) 843 else: 844 self.n = float(X[0].shape[0]) 845 846 self.reset()
847
848 - def reset(self):
849 850 self._lambda_max = None
851
852 - def f(self, w):
853 """Function value. 854 855 From the interface "Function". 856 """ 857 wX = np.dot(self.X[0], w[0]).T 858 Yc = np.dot(self.X[1], w[1]) 859 wXYc = np.dot(wX, Yc) 860 861 return -wXYc[0, 0] / self.n
862
863 - def grad(self, w, index):
864 """Gradient of the function. 865 866 From the interface "MultiblockGradient". 867 868 Parameters 869 ---------- 870 w : List of numpy arrays. The weight vectors, w[index] is the point at 871 which to evaluate the gradient. 872 873 index : Non-negative integer. Which variable the gradient is for. 874 875 Examples 876 -------- 877 >>> import numpy as np 878 >>> from parsimony.functions.multiblock.losses import LatentVariableCovariance 879 >>> 880 >>> np.random.seed(42) 881 >>> X = np.random.rand(100, 150) 882 >>> Y = np.random.rand(100, 50) 883 >>> w = np.random.rand(150, 1) 884 >>> c = np.random.rand(50, 1) 885 >>> cov = LatentVariableCovariance([X, Y]) 886 >>> grad = cov.grad([w, c], 0) 887 >>> approx_grad = cov.approx_grad([w, c], 0) 888 >>> np.allclose(grad, approx_grad) 889 True 890 """ 891 index = int(index) 892 grad = -np.dot(self.X[index].T, 893 np.dot(self.X[1 - index], w[1 - index])) \ 894 * (1.0 / self.n) 895 896 return grad
897
898 - def L(self, w, index):
899 """Lipschitz constant of the gradient with given index. 900 901 From the interface "MultiblockLipschitzContinuousGradient". 902 """ 903 # Any positive real number suffices, but a small one will give a larger 904 # step in e.g. proximal gradient descent. 905 return np.sqrt(consts.TOLERANCE) # 1.0
906 907 # def lambda_max(self): 908 # """ Largest eigenvalue of the corresponding covariance matrix. 909 # 910 # From the interface "Eigenvalues". 911 # """ 912 # # Note that we can save the state here since lmax(A) does not 913 # 914 # from algorithms import FastSVDProduct 915 # svd = FastSVDProduct() 916 # v = svd(self.X[0].T, self.X[1], max_iter=100) 917 # s = np.dot(self.X[0].T, np.dot(self.X[1], v)) 918 # 919 # return np.sum(s ** 2.0) / (self.n ** 2.0) 920 921
922 -class LatentVariableCovarianceSquared(mb_properties.MultiblockFunction, 923 mb_properties.MultiblockGradient, 924 mb_properties.MultiblockLipschitzContinuousGradient):
925 """Represents 926 927 Cov(X.w, Y.c)² = ((1 / (n - 1)) * w'.X'.Y.c)², 928 929 where X.w and Y.c are latent variables. 930 931 Parameters 932 ---------- 933 X : List with two numpy arrays. The two blocks. 934 935 unbiased : Boolean. Whether or not to use biased or unbiased sample 936 covariance. Default is True, the unbiased sample covariance is 937 used. 938 """
939 - def __init__(self, X, unbiased=True):
940 941 self.X = X 942 if unbiased: 943 self.n = float(X[0].shape[0] - 1.0) 944 else: 945 self.n = float(X[0].shape[0]) 946 947 self.reset()
948
949 - def reset(self):
950 pass
951
952 - def f(self, w):
953 """Function value. 954 955 From the interface "Function". 956 957 Parameters 958 ---------- 959 w : Numpy array (p-by-1). The point at which to evaluate the function. 960 """ 961 wX = np.dot(self.X[0], w[0]).T 962 Yc = np.dot(self.X[1], w[1]) 963 wXYc = np.dot(wX, Yc)[0, 0] 964 965 return -((wXYc / self.n) ** 2.0)
966
967 - def grad(self, w, index):
968 """Gradient of the function. 969 970 From the interface "MultiblockGradient". 971 972 Parameters 973 ---------- 974 w : List of numpy arrays. The weight vectors, w[index] is the point at 975 which to evaluate the gradient. 976 977 index : Non-negative integer. Which variable the gradient is for. 978 979 Examples 980 -------- 981 >>> import numpy as np 982 >>> from parsimony.functions.multiblock.losses import LatentVariableCovarianceSquared 983 >>> 984 >>> np.random.seed(42) 985 >>> X = np.random.rand(100, 150) 986 >>> Y = np.random.rand(100, 50) 987 >>> w = np.random.rand(150, 1) 988 >>> c = np.random.rand(50, 1) 989 >>> cov = LatentVariableCovarianceSquared([X, Y]) 990 >>> grad = cov.grad([w, c], 0) 991 >>> approx_grad = cov.approx_grad([w, c], 0) 992 >>> np.allclose(grad, approx_grad) 993 True 994 """ 995 wX = np.dot(self.X[0], w[0]).T 996 Yc = np.dot(self.X[1], w[1]) 997 wXYc = np.dot(wX, Yc)[0, 0] 998 999 index = int(index) 1000 grad = np.dot(self.X[index].T, 1001 np.dot(self.X[1 - index], w[1 - index])) \ 1002 * ((2.0 * wXYc) / (self.n * self.n)) 1003 1004 return -grad
1005
1006 - def L(self, w, index):
1007 """Lipschitz constant of the gradient with given index. 1008 1009 From the interface "MultiblockLipschitzContinuousGradient". 1010 """ 1011 index = int(index) 1012 grad = np.dot(self.X[index].T, 1013 np.dot(self.X[1 - index], w[1 - index])) \ 1014 * (1.0 / self.n) 1015 1016 return 2.0 * maths.norm(grad) ** 2.0
1017 1018
1019 -class GeneralisedMultiblock(mb_properties.MultiblockFunction, 1020 mb_properties.MultiblockGradient, 1021 # mb_properties.MultiblockProximalOperator, 1022 mb_properties.MultiblockProjectionOperator, 1023 properties.StepSize, 1024 # LipschitzContinuousGradient, 1025 # NesterovFunction, Continuation, DualFunction 1026 ):
1027
1028 - def __init__(self, X, functions):
1029 1030 self.X = X 1031 self.functions = functions 1032 1033 self.reset()
1034
1035 - def reset(self):
1036 1037 for i in xrange(len(self.functions)): 1038 for j in xrange(len(self.functions[i])): 1039 if i == j: 1040 for k in xrange(len(self.functions[i][j])): 1041 self.functions[i][j][k].reset() 1042 else: 1043 if not self.functions[i][j] is None: 1044 self.functions[i][j].reset()
1045
1046 - def f(self, w):
1047 """Function value. 1048 """ 1049 val = 0.0 1050 for i in xrange(len(self.functions)): 1051 fi = self.functions[i] 1052 for j in xrange(len(fi)): 1053 fij = fi[j] 1054 if i == j and isinstance(fij, (list, tuple)): 1055 for k in xrange(len(fij)): 1056 # print "Diag: ", i 1057 val += fij[k].f(w[i]) 1058 else: 1059 # print "f(w[%d], w[%d])" % (i, j) 1060 if not fij is None: 1061 val += fij.f([w[i], w[j]]) 1062 1063 # TODO: Check instead if it is a numpy array. 1064 if not isinstance(val, numbers.Number): 1065 return val[0, 0] 1066 else: 1067 return val
1068
1069 - def grad(self, w, index):
1070 """Gradient of the differentiable part of the function. 1071 1072 From the interface "MultiblockGradient". 1073 """ 1074 grad = 0.0 1075 fi = self.functions[index] 1076 for j in xrange(len(fi)): 1077 fij = fi[j] 1078 if index != j: 1079 if isinstance(fij, properties.Gradient): 1080 grad += fij.grad(w[index]) 1081 elif isinstance(fij, mb_properties.MultiblockGradient): 1082 grad += fij.grad([w[index], w[j]], 0) 1083 1084 for i in xrange(len(self.functions)): 1085 fij = self.functions[i][index] 1086 if i != index: 1087 if isinstance(fij, properties.Gradient): 1088 # We shouldn't do anything here, right? This means e.g. 1089 # that this (block i) is the y of a logistic regression. 1090 pass 1091 # grad += fij.grad(w) 1092 elif isinstance(fij, mb_properties.MultiblockGradient): 1093 grad += fij.grad([w[i], w[index]], 1) 1094 1095 fii = self.functions[index][index] 1096 for k in xrange(len(fii)): 1097 if isinstance(fii[k], properties.Gradient): 1098 grad += fii[k].grad(w[index]) 1099 1100 return grad
1101 1102 # def prox(self, w, index, factor=1.0): 1103 # """The proximal operator corresponding to the function with the index. 1104 # 1105 # From the interface "MultiblockProximalOperator". 1106 # """ 1107 ## # Find a proximal operator. 1108 ## fii = self.functions[index][index] 1109 ## for k in xrange(len(fii)): 1110 ## if isinstance(fii[k], ProximalOperator): 1111 ## w[index] = fii[k].prox(w[index], factor) 1112 ## break 1113 ## # If no proximal operator was found, we will just return the same 1114 ## # vectors again. The proximal operator of the zero function returns the 1115 ## # vector itself. 1116 # 1117 # return w 1118
1119 - def proj(self, w, index):
1120 """The projection operator corresponding to the function with the 1121 index. 1122 1123 From the interface "MultiblockProjectionOperator". 1124 """ 1125 # Find a projection operators. 1126 # fii = self.functions[index][index] 1127 f = self.get_constraints(index) 1128 for k in xrange(len(f)): 1129 if isinstance(f[k], properties.ProjectionOperator): 1130 w[index] = f[k].proj(w[index]) 1131 break 1132 1133 # If no projection operator was found, we will just return the same 1134 # vectors again. 1135 return w
1136
1137 - def step(self, w, index):
1138 1139 # return 0.0001 1140 1141 all_lipschitz = True 1142 1143 # Add the Lipschitz constants. 1144 L = 0.0 1145 fi = self.functions[index] 1146 for j in xrange(len(fi)): 1147 if j != index and fi[j] is not None: 1148 fij = fi[j] 1149 if isinstance(fij, properties.LipschitzContinuousGradient): 1150 L += fij.L() 1151 elif isinstance(fij, 1152 mb_properties.MultiblockLipschitzContinuousGradient): 1153 L += fij.L(w, index) 1154 else: 1155 all_lipschitz = False 1156 break 1157 1158 if all_lipschitz: 1159 fii = self.functions[index][index] 1160 for k in xrange(len(fii)): 1161 if fi[j] is None: 1162 continue 1163 if isinstance(fii[k], properties.LipschitzContinuousGradient): 1164 L += fii[k].L() 1165 elif isinstance(fii[k], 1166 mb_properties.MultiblockLipschitzContinuousGradient): 1167 L += fii[k].L(w, index) 1168 else: 1169 all_lipschitz = False 1170 break 1171 1172 if all_lipschitz and L > 0.0: 1173 t = 1.0 / L 1174 else: 1175 # If all functions did not have Lipschitz continuous gradients, 1176 # try to find the step size through backtracking line search. 1177 class F(properties.Function, 1178 properties.Gradient): 1179 def __init__(self, func, w, index): 1180 self.func = func 1181 self.w = w 1182 self.index = index
1183 1184 def f(self, x): 1185 1186 # Temporarily replace the index:th variable with x. 1187 w_old = self.w[self.index] 1188 self.w[self.index] = x 1189 f = self.func.f(w) 1190 self.w[self.index] = w_old 1191 1192 return f
1193 1194 def grad(self, x): 1195 1196 # Temporarily replace the index:th variable with x. 1197 w_old = self.w[self.index] 1198 self.w[self.index] = x 1199 g = self.func.grad(w, index) 1200 self.w[self.index] = w_old 1201 1202 return g 1203 1204 func = F(self, w, index) 1205 p = -self.grad(w, index) 1206 1207 from algorithms import BacktrackingLineSearch 1208 import parsimony.functions.penalties as penalties 1209 line_search = BacktrackingLineSearch( 1210 condition=penalties.SufficientDescentCondition, max_iter=30) 1211 a = np.sqrt(1.0 / self.X[index].shape[1]) # Arbitrarily "small". 1212 t = line_search(func, w[index], p, rho=0.5, a=a, c=1e-4) 1213 1214 return t 1215