parsimony.functions.multiblock.losses

1 # -*- coding: utf-8 -*- 2 """ 3 The :mod:`parsimony.functions.losses` module contains multiblock loss 4 functions. 5 6 Copyright (c) 2013-2014, CEA/DSV/I2BM/Neurospin. All rights reserved. 7 8 Created on Tue Feb 4 08:51:43 2014 9 10 @author: Tommy Löfstedt 11 @email: lofstedt.tommy@gmail.com 12 @license: BSD 3-clause. 13 """ 14 import abc 15 import numbers 16 17 import numpy as np 18 19 import parsimony.utils as utils 20 import parsimony.utils.maths as maths 21 import parsimony.functions.properties as properties 22 import parsimony.utils.consts as consts 23 import properties as mb_properties 24 25 __all__ = ["CombinedMultiblockFunction", 26 "MultiblockFunctionWrapper", "MultiblockNesterovFunctionWrapper", 27 "LatentVariableCovariance"] 28 29

30 -class CombinedMultiblockFunction(mb_properties.MultiblockFunction, 31 mb_properties.MultiblockGradient, 32 mb_properties.MultiblockProximalOperator, 33 mb_properties.MultiblockProjectionOperator, 34 # mb_properties.MultiblockContinuation, 35 mb_properties.MultiblockStepSize):

36 """Combines one or more loss functions, any number of penalties and zero 37 or one proximal operator. 38 39 This function thus represents 40 41 f(x) = g_1(x) [ + g_2(x) ... ] [ + d_1(x) ... ] [ + N_1(x) ...] 42 [ + h_1(x) ...], 43 44 subject to [ C_1(x) <= c_1, 45 C_2(x) <= c_2, 46 ... ], 47 48 where g_i are differentiable Functions that may be multiblock, d_j are 49 differentiable penalties, h_k are a ProximalOperators and N_l are 50 NesterovFunctions. All functions and penalties must thus have Gradients, 51 unless they are ProximalOperators. 52 53 If no ProximalOperator is given, prox will be the identity. 54 55 Parameters 56 ---------- 57 X : List of numpy arrays. The blocks of data in the multiblock model. 58 59 functions : List of lists of lists. A function matrix, with element 60 i,j connecting block i to block j. 61 62 penalties : A list of lists. Element i of the outer list is also a list 63 that contains the penalties for block i. 64 65 prox : A list of lists. Element i of the outer list is also a list that 66 contains the penalties that can be expressed as proximal operators 67 for block i. 68 69 constraints : A list of lists. Element i of the outer list is also a list 70 that contains the constraints for block i. 71 """

72 - def __init__(self, X, functions=[], penalties=[], prox=[], constraints=[]):

73 74 self.K = len(X) 75 self.X = X 76 77 if len(functions) != self.K: 78 self._f = [0] * self.K 79 for i in xrange(self.K): 80 self._f[i] = [0] * self.K 81 for j in xrange(self.K): 82 self._f[i][j] = list() 83 else: 84 self._f = functions 85 86 if len(penalties) != self.K: 87 self._p = [0] * self.K 88 self._N = [0] * self.K 89 for i in xrange(self.K): 90 self._p[i] = list() 91 self._N[i] = list() 92 else: 93 self._p = [0] * self.K 94 self._N = [0] * self.K 95 for i in xrange(self.K): 96 self._p[i] = list() 97 self._N[i] = list() 98 for di in penalties[i]: 99 if isinstance(di, properties.NesterovFunction): 100 self._N[i].append(di) 101 else: 102 self._p[i].append(di) 103 104 if len(prox) != self.K: 105 self._prox = [0] * self.K 106 for i in xrange(self.K): 107 self._prox[i] = list() 108 else: 109 self._prox = prox 110 111 if len(constraints) != self.K: 112 self._c = [0] * self.K 113 for i in xrange(self.K): 114 self._c[i] = list() 115 else: 116 self._c = constraints

117

118 - def reset(self):

119 120 for fi in self._f: 121 for fij in fi: 122 for fijk in fij: 123 fijk.reset() 124 125 for pi in self._p: 126 for pik in pi: 127 pik.reset() 128 129 for Ni in self._N: 130 for Nik in Ni: 131 Nik.reset() 132 133 for proxi in self._prox: 134 for proxik in proxi: 135 proxik.reset() 136 137 for ci in self._c: 138 for cik in ci: 139 cik.reset()

140

141 - def add_function(self, function, i, j):

142 """Add a function that connects blocks i and j. 143 144 Parameters 145 ---------- 146 function : Function or MultiblockFunction. A function that connects 147 block i and block j. 148 149 i : Non-negative integer. Index of the first block. Zero based, so 0 150 is the first block. 151 152 j : Non-negative integer. Index of the second block. Zero based, so 0 153 is the first block. 154 """ 155 if not isinstance(function, properties.Gradient): 156 if not isinstance(function, mb_properties.MultiblockGradient): 157 raise ValueError("Functions must have gradients.") 158 159 self._f[i][j].append(function)

160

161 - def add_penalty(self, penalty, i):

162 163 if not isinstance(penalty, properties.Penalty): 164 raise ValueError("Not a penalty.") 165 166 if isinstance(penalty, properties.NesterovFunction): 167 self._N[i].append(penalty) 168 else: 169 if isinstance(penalty, properties.Gradient): 170 self._p[i].append(penalty) 171 else: 172 if isinstance(penalty, properties.ProximalOperator): 173 self._prox[i].append(penalty) 174 else: 175 raise ValueError("Non-smooth and no proximal operator.")

176 177 # @utils.deprecated("add_penalty")

178 - def add_prox(self, penalty, i):

179 180 if not isinstance(penalty, properties.ProximalOperator): 181 raise ValueError("Not a proximal operator.") 182 183 self._prox[i].append(penalty)

184

185 - def add_constraint(self, constraint, i):

186 187 if not isinstance(constraint, properties.Constraint): 188 raise ValueError("Not a constraint.") 189 if not isinstance(constraint, properties.ProjectionOperator): 190 raise ValueError("Constraints must have projection operators.") 191 192 self._c[i].append(constraint)

193

194 - def has_nesterov_function(self, index):

195 196 return len(self._N[index]) > 0

197

198 - def f(self, w):

199 """Function value. 200 201 Parameters 202 ---------- 203 w : List of numpy arrays. The weight vectors. 204 """ 205 val = 0.0 206 207 for i in xrange(len(self._f)): 208 fi = self._f[i] 209 for j in xrange(len(fi)): 210 fij = self._f[i][j] 211 for k in xrange(len(fij)): 212 val += fij[k].f([w[i], w[j]]) 213 214 for i in xrange(len(self._p)): 215 pi = self._p[i] 216 for k in xrange(len(pi)): 217 val += pi[k].f(w[i]) 218 219 for i in xrange(len(self._N)): 220 Ni = self._N[i] 221 for k in xrange(len(Ni)): 222 val += Ni[k].f(w[i]) 223 224 for i in xrange(len(self._prox)): 225 proxi = self._prox[i] 226 for k in xrange(len(proxi)): 227 val += proxi[k].f(w[i]) 228 229 return val

230

231 - def fmu(self, w):

232 """Function value of smoothed function. 233 234 Parameters 235 ---------- 236 w : List of numpy arrays. The weight vectors. 237 """ 238 val = 0.0 239 240 for i in xrange(len(self._f)): 241 fi = self._f[i] 242 for j in xrange(len(fi)): 243 fij = self._f[i][j] 244 for k in xrange(len(fij)): 245 val += fij[k].f([w[i], w[j]]) 246 247 for i in xrange(len(self._p)): 248 pi = self._p[i] 249 for k in xrange(len(pi)): 250 val += pi[k].f(w[i]) 251 252 for i in xrange(len(self._N)): 253 Ni = self._N[i] 254 for k in xrange(len(Ni)): 255 val += Ni[k].fmu(w[i]) 256 257 for i in xrange(len(self._prox)): 258 proxi = self._prox[i] 259 for k in xrange(len(proxi)): 260 val += proxi[k].f(w[i]) 261 262 return val

263

264 - def grad(self, w, index):

265 """Gradient of the differentiable part of the function. 266 267 From the interface "MultiblockGradient". 268 269 Parameters 270 ---------- 271 w : List of numpy arrays. The weight vectors, w[index] is the point at 272 which to evaluate the gradient. 273 274 index : Non-negative integer. Which variable the step is for. 275 """ 276 grad = np.zeros(w[index].shape) 277 278 # Add gradients from the loss functions. 279 fi = self._f[index] 280 for j in xrange(len(fi)): 281 fij = fi[j] 282 for k in xrange(len(fij)): 283 fijk = fij[k] 284 if isinstance(fijk, properties.Gradient): 285 grad += fijk.grad(w[index]) 286 elif isinstance(fijk, mb_properties.MultiblockGradient): 287 grad += fijk.grad([w[index], w[j]], 0) 288 289 for i in xrange(len(self._f)): 290 fij = self._f[i][index] 291 if i != index: # Do not count these twice. 292 for k in xrange(len(fij)): 293 fijk = fij[k] 294 if isinstance(fijk, properties.Gradient): 295 # We shouldn't do anything here, right? This means e.g. 296 # that this (block i) is the y of a logistic regression. 297 pass 298 # grad += fij.grad(w[i]) 299 elif isinstance(fijk, mb_properties.MultiblockGradient): 300 grad += fijk.grad([w[i], w[index]], 1) 301 302 # Add gradients from the penalties. 303 pi = self._p[index] 304 for k in xrange(len(pi)): 305 grad += pi[k].grad(w[index]) 306 307 Ni = self._N[index] 308 for k in xrange(len(Ni)): 309 grad += Ni[k].grad(w[index]) 310 311 return grad

312

313 - def prox(self, w, index, factor=1.0, eps=consts.TOLERANCE, max_iter=100):

314 """The proximal operator of the non-differentiable part of the 315 function with the given index. 316 317 From the interface "MultiblockProximalOperator". 318 319 Parameters 320 ---------- 321 w : List of numpy arrays. The weight vectors. 322 323 index : Non-negative integer. Which variable the step is for. 324 325 factor : Positive float. A factor by which the Lagrange multiplier is 326 scaled. This is usually the step size. 327 """ 328 prox = self._prox[index] 329 proj = self._c[index] 330 331 if len(prox) == 0 and len(proj) == 0: 332 prox_w = w[index] 333 334 elif len(prox) == 1 and len(proj) == 0: 335 prox_w = prox[0].prox(w[index]) 336 337 elif len(prox) == 0 and (len(proj) == 1 or len(proj) == 2): 338 prox_w = self.proj(w, index, eps=eps, max_iter=max_iter) 339 340 else: 341 from parsimony.algorithms.proximal \ 342 import ParallelDykstrasProximalAlgorithm 343 combo = ParallelDykstrasProximalAlgorithm(output=False, 344 eps=eps, 345 max_iter=max_iter, 346 min_iter=1) 347 prox_w = combo.run(w[index], prox=prox, proj=proj, factor=factor) 348 349 return prox_w

350

351 - def proj(self, w, index, eps=consts.TOLERANCE, max_iter=100):

352 """The projection operator of a constraint that corresponds to the 353 function with the given index. 354 355 From the interface "MultiblockProjectionOperator". 356 357 Parameters 358 ---------- 359 w : List of numpy arrays. The weight vectors. 360 361 index : Non-negative integer. Which variable the step is for. 362 """ 363 prox = self._prox[index] 364 proj = self._c[index] 365 if len(proj) == 1 and len(prox) == 0: 366 proj_w = proj[0].proj(w[index], eps=eps, max_iter=max_iter) 367 368 elif len(proj) == 2 and len(prox) == 0: 369 from parsimony.algorithms.proximal \ 370 import DykstrasProjectionAlgorithm 371 combo = DykstrasProjectionAlgorithm(eps=eps, 372 max_iter=max_iter, min_iter=1) 373 374 proj_w = combo.run(proj, w[index]) 375 376 else: 377 proj_w = self.prox(w, index, eps=eps, max_iter=max_iter) 378 379 return proj_w

380

381 - def step(self, w, index):

382 """The step size to use in descent methods. 383 384 From the interface "StepSize". 385 386 Parameters 387 ---------- 388 w : Numpy array. The point at which to determine the step size. 389 390 index : Non-negative integer. The variable which the step is for. 391 """ 392 all_lipschitz = True 393 L = 0.0 394 395 # Add Lipschitz constants from the loss functions. 396 fi = self._f[index] 397 for j in xrange(len(fi)): 398 fij = fi[j] 399 for k in xrange(len(fij)): 400 fijk = fij[k] 401 if isinstance(fijk, properties.Gradient): 402 if not isinstance(fijk, 403 properties.LipschitzContinuousGradient): 404 all_lipschitz = False 405 break 406 else: 407 L += fijk.L(w[index]) 408 elif isinstance(fijk, mb_properties.MultiblockGradient): 409 if not isinstance(fijk, 410 mb_properties.MultiblockLipschitzContinuousGradient): 411 all_lipschitz = False 412 break 413 else: 414 L += fijk.L([w[index], w[j]], 0) 415 416 if not all_lipschitz: 417 break 418 419 for i in xrange(len(self._f)): 420 fij = self._f[i][index] 421 if i != index: # Do not visit these twice. 422 for k in xrange(len(fij)): 423 fijk = fij[k] 424 if isinstance(fijk, properties.Gradient): 425 # We shouldn't do anything here, right? This means that 426 # this (block i) is e.g. the y in a logistic 427 # regression. 428 pass 429 elif isinstance(fijk, mb_properties.MultiblockGradient): 430 if not isinstance(fijk, 431 mb_properties.MultiblockLipschitzContinuousGradient): 432 all_lipschitz = False 433 break 434 else: 435 L += fijk.L([w[i], w[index]], 1) 436 437 # Add Lipschitz constants from the penalties. 438 pi = self._p[index] 439 for k in xrange(len(pi)): 440 if not isinstance(pi[k], properties.LipschitzContinuousGradient): 441 all_lipschitz = False 442 break 443 else: 444 L += pi[k].L() # w[index]) 445 446 Ni = self._N[index] 447 for k in xrange(len(Ni)): 448 if not isinstance(Ni[k], properties.LipschitzContinuousGradient): 449 all_lipschitz = False 450 break 451 else: 452 L += Ni[k].L() # w[index]) 453 454 step = 0.0 455 if all_lipschitz and L >= consts.TOLERANCE: 456 step = 1.0 / L 457 else: 458 # If all functions did not have Lipschitz continuous gradients, 459 # try to find the step size through backtracking line search. 460 class F(properties.Function, 461 properties.Gradient): 462 463 def __init__(self, func, w, index): 464 self.func = func 465 self.w = w 466 self.index = index

467 468 def f(self, x): 469 470 # Temporarily replace the index:th variable with x. 471 w_old = self.w[self.index] 472 self.w[self.index] = x 473 f = self.func.f(w) 474 self.w[self.index] = w_old 475 476 return f

477 478 def grad(self, x): 479 480 # Temporarily replace the index:th variable with x. 481 w_old = self.w[self.index] 482 self.w[self.index] = x 483 g = self.func.grad(w, index) 484 self.w[self.index] = w_old 485 486 return g 487 488 func = F(self, w, index) 489 p = -self.grad(w, index) 490 491 from parsimony.algorithms.utils import BacktrackingLineSearch 492 import parsimony.functions.penalties as penalties 493 line_search = BacktrackingLineSearch( 494 condition=penalties.SufficientDescentCondition, max_iter=30) 495 a = np.sqrt(1.0 / self.X[index].shape[1]) # Arbitrarily "small". 496 step = line_search.run(func, w[index], p, rho=0.5, a=a, 497 condition_params={"c": 1e-4}) 498 499 return step 500 501

502 -class MultiblockFunctionWrapper(properties.CompositeFunction, 503 properties.Gradient, 504 properties.StepSize, 505 properties.ProximalOperator):

506

507 - def __init__(self, function, w, index):

508 self.function = function 509 self.w = w 510 self.index = index

511

512 - def f(self, w):

513 """Function value. 514 515 From the interface "Function". 516 517 Parameters 518 ---------- 519 w : Numpy array (p-by-1). The point at which to evaluate the function. 520 """ 521 return self.function.f(self.w[:self.index] + \ 522 [w] + \ 523 self.w[self.index + 1:])

524

525 - def grad(self, w):

526 """Gradient of the function. 527 528 Parameters 529 ---------- 530 w : Numpy array (p-by-1). The point at which to evaluate the gradient. 531 """ 532 return self.function.grad(self.w[:self.index] + \ 533 [w] + \ 534 self.w[self.index + 1:], 535 self.index)

536

537 - def prox(self, w, factor=1.0, eps=consts.TOLERANCE, max_iter=100):

538 """The proximal operator corresponding to the function. 539 540 Parameters 541 ---------- 542 w : Numpy array (p-by-1). The point at which to apply the proximal 543 operator. 544 545 factor : Positive float. A factor by which the Lagrange multiplier is 546 scaled. This is usually the step size. 547 """ 548 return self.function.prox(self.w[:self.index] + \ 549 [w] + \ 550 self.w[self.index + 1:], 551 self.index, factor=factor, 552 eps=eps, max_iter=max_iter)

553

554 - def step(self, w, index=0):

555 """The step size to use in descent methods. 556 557 Parameters 558 ---------- 559 w : Numpy array. The point at which to determine the step size. 560 """ 561 return self.function.step(self.w[:self.index] + \ 562 [w] + \ 563 self.w[self.index + 1:], 564 self.index)

565 566

567 -class MultiblockNesterovFunctionWrapper(MultiblockFunctionWrapper, 568 properties.NesterovFunction, 569 properties.Continuation):

570

571 - def __init__(self, function, w, index):

572 super(MultiblockNesterovFunctionWrapper, self).__init__(function, 573 w, 574 index)

575

576 - def get_params(self, *args):

577 578 Ni = self.function._N[self.index] 579 580 ret = dict() 581 for k in args: 582 params = [] 583 for N in Ni: 584 value = getattr(N, k) 585 params.append(value) 586 587 ret[k] = params 588 589 return ret

590

591 - def fmu(self, beta, mu=None):

592 """Returns the smoothed function value. 593 594 From the interface "NesterovFunction". 595 596 Parameters 597 ---------- 598 beta : Numpy array. A weight vector. 599 600 mu : Non-negative float. The regularisation constant for the smoothing. 601 """ 602 Ni = self.function._N[self.index] 603 f = 0.0 604 for N in Ni: 605 f += N.fmu(beta, mu=mu) 606 607 return f

608

609 - def phi(self, alpha, beta):

610 """ Function value with known alpha. 611 612 From the interface "NesterovFunction". 613 """ 614 raise NotImplementedError('Abstract method "phi" must be ' 615 'specialised!')

616

617 - def get_mu(self):

618 """Returns the regularisation constant for the smoothing. 619 620 From the interface "NesterovFunction". 621 """ 622 Ni = self.function._N[self.index] 623 if len(Ni) == 0: 624 raise ValueError("No penalties are Nesterov functions.") 625 626 return Ni[0].get_mu()

627

628 - def set_mu(self, mu):

629 """Sets the regularisation constant for the smoothing. 630 631 From the interface "NesterovFunction". 632 633 Parameters 634 ---------- 635 mu : Non-negative float. The regularisation constant for the smoothing 636 to use from now on. 637 638 Returns 639 ------- 640 old_mu : Non-negative float. The old regularisation constant for the 641 smoothing that was overwritten and no longer is used. 642 """ 643 old_mu = self.get_mu() 644 645 Ni = self.function._N[self.index] 646 for N in Ni: 647 N.set_mu(mu) 648 649 return old_mu

650

651 - def alpha(self, beta):

652 """ Dual variable of the Nesterov function. 653 654 From the interface "NesterovFunction". 655 656 Parameters 657 ---------- 658 beta : Numpy array (p-by-1). The variable for which to compute the dual 659 variable alpha. 660 """ 661 Ni = self.function._N[self.index] 662 alpha = [] 663 for N in Ni: 664 alpha += N.alpha(beta) 665 666 return alpha

667

668 - def A(self):

669 """ Linear operator of the Nesterov function. 670 671 From the interface "NesterovFunction". 672 """ 673 Ni = self.function._N[self.index] 674 A = [] 675 for N in Ni: 676 A += N.A()

677

678 - def Aa(self, alpha):

679 """ Compute A'*alpha. 680 681 From the interface "NesterovFunction". 682 683 Parameters 684 ---------- 685 alpha : Numpy array (x-by-1). The dual variable alpha. 686 """ 687 A = self.A() 688 Aa = A[0].T.dot(alpha[0]) 689 for i in xrange(1, len(A)): 690 Aa += A[i].T.dot(alpha[i]) 691 692 return Aa

693

694 - def project(self, alpha):

695 """ Projection onto the compact space of the Nesterov function. 696 697 From the interface "NesterovFunction". 698 699 Parameters 700 ---------- 701 alpha : Numpy array (x-by-1). The not-yet-projected dual variable 702 alpha. 703 """ 704 Ni = self.function._N[self.index] 705 a = [] 706 i = 0 707 for N in Ni: 708 A = N.A() 709 a += N.project(alpha[i:len(A)]) 710 i += len(A) 711 712 return a

713

714 - def M(self):

715 """ The maximum value of the regularisation of the dual variable. We 716 have 717 718 M = max_{alpha in K} 0.5*|alpha|²_2. 719 720 From the interface "NesterovFunction". 721 """ 722 Ni = self.function._N[self.index] 723 M = 0.0 724 for N in Ni: 725 M += N.M() 726 727 return M

728

729 - def estimate_mu(self, beta):

730 """ Compute a "good" value of mu with respect to the given beta. 731 732 From the interface "NesterovFunction". 733 734 Parameters 735 ---------- 736 beta : Numpy array (p-by-1). The primal variable at which to compute a 737 feasible value of mu. 738 """ 739 Ni = self.function._N[self.index] 740 mu = consts.TOLERANCE 741 for N in Ni: 742 mu = max(mu, N.estimate_mu(beta)) 743 744 return mu

745

746 - def mu_opt(self, eps):

747 """The optimal value of mu given epsilon. 748 749 Parameters 750 ---------- 751 eps : Positive float. The desired precision. 752 753 Returns 754 ------- 755 mu : Positive float. The optimal regularisation parameter. 756 757 From the interface "Continuation". 758 """ 759 raise NotImplementedError('Abstract method "mu_opt" must be ' 760 'specialised!')

761

762 - def eps_opt(self, mu):

763 """The optimal value of epsilon given mu. 764 765 Parameters 766 ---------- 767 mu : Positive float. The regularisation constant of the smoothing. 768 769 Returns 770 ------- 771 eps : Positive float. The optimal precision. 772 773 From the interface "Continuation". 774 """ 775 raise NotImplementedError('Abstract method "eps_opt" must be ' 776 'specialised!')

777

778 - def eps_max(self, mu):

779 """The maximum value of epsilon. 780 781 From the interface "Continuation". 782 783 Parameters 784 ---------- 785 mu : Positive float. The regularisation constant of the smoothing. 786 787 Returns 788 ------- 789 eps : Positive float. The upper limit, the maximum, precision. 790 """ 791 Ni = self.function._N[self.index] 792 gM = 0.0 793 for N in Ni: 794 gM += N.l * N.M() 795 796 return float(mu) * gM

797

798 - def mu_max(self, eps):

799 """The maximum value of mu. 800 801 From the interface "Continuation". 802 803 Parameters 804 ---------- 805 eps : Positive float. The maximum precision of the smoothing. 806 807 Returns 808 ------- 809 mu : Positive float. The upper limit, the maximum, of the 810 regularisation constant of the smoothing. 811 """ 812 Ni = self.function._N[self.index] 813 gM = 0.0 814 for N in Ni: 815 gM += N.l * N.M() 816 817 return float(eps) / gM

818 819

820 -class LatentVariableCovariance(mb_properties.MultiblockFunction, 821 mb_properties.MultiblockGradient, 822 mb_properties.MultiblockLipschitzContinuousGradient):

823 #properties.Eigenvalues): 824 """Represents 825 826 Cov(X.w, Y.c) = (1 / (n - 1)) * w'.X'.Y.c, 827 828 where X.w and Y.c are latent variables. 829 830 Parameters 831 ---------- 832 X : List with two numpy arrays. The two blocks. 833 834 unbiased : Boolean. Whether or not to use biased or unbiased sample 835 covariance. Default is True, the unbiased sample covariance is 836 used. 837 """

838 - def __init__(self, X, unbiased=True):

839 840 self.X = X 841 if unbiased: 842 self.n = float(X[0].shape[0] - 1.0) 843 else: 844 self.n = float(X[0].shape[0]) 845 846 self.reset()

847

848 - def reset(self):

849 850 self._lambda_max = None

851

852 - def f(self, w):

853 """Function value. 854 855 From the interface "Function". 856 """ 857 wX = np.dot(self.X[0], w[0]).T 858 Yc = np.dot(self.X[1], w[1]) 859 wXYc = np.dot(wX, Yc) 860 861 return -wXYc[0, 0] / self.n

862

863 - def grad(self, w, index):

864 """Gradient of the function. 865 866 From the interface "MultiblockGradient". 867 868 Parameters 869 ---------- 870 w : List of numpy arrays. The weight vectors, w[index] is the point at 871 which to evaluate the gradient. 872 873 index : Non-negative integer. Which variable the gradient is for. 874 875 Examples 876 -------- 877 >>> import numpy as np 878 >>> from parsimony.functions.multiblock.losses import LatentVariableCovariance 879 >>> 880 >>> np.random.seed(42) 881 >>> X = np.random.rand(100, 150) 882 >>> Y = np.random.rand(100, 50) 883 >>> w = np.random.rand(150, 1) 884 >>> c = np.random.rand(50, 1) 885 >>> cov = LatentVariableCovariance([X, Y]) 886 >>> grad = cov.grad([w, c], 0) 887 >>> approx_grad = cov.approx_grad([w, c], 0) 888 >>> np.allclose(grad, approx_grad) 889 True 890 """ 891 index = int(index) 892 grad = -np.dot(self.X[index].T, 893 np.dot(self.X[1 - index], w[1 - index])) \ 894 * (1.0 / self.n) 895 896 return grad

897

898 - def L(self, w, index):

899 """Lipschitz constant of the gradient with given index. 900 901 From the interface "MultiblockLipschitzContinuousGradient". 902 """ 903 # Any positive real number suffices, but a small one will give a larger 904 # step in e.g. proximal gradient descent. 905 return np.sqrt(consts.TOLERANCE) # 1.0

906 907 # def lambda_max(self): 908 # """ Largest eigenvalue of the corresponding covariance matrix. 909 # 910 # From the interface "Eigenvalues". 911 # """ 912 # # Note that we can save the state here since lmax(A) does not 913 # 914 # from algorithms import FastSVDProduct 915 # svd = FastSVDProduct() 916 # v = svd(self.X[0].T, self.X[1], max_iter=100) 917 # s = np.dot(self.X[0].T, np.dot(self.X[1], v)) 918 # 919 # return np.sum(s ** 2.0) / (self.n ** 2.0) 920 921

922 -class LatentVariableCovarianceSquared(mb_properties.MultiblockFunction, 923 mb_properties.MultiblockGradient, 924 mb_properties.MultiblockLipschitzContinuousGradient):

925 """Represents 926 927 Cov(X.w, Y.c)² = ((1 / (n - 1)) * w'.X'.Y.c)², 928 929 where X.w and Y.c are latent variables. 930 931 Parameters 932 ---------- 933 X : List with two numpy arrays. The two blocks. 934 935 unbiased : Boolean. Whether or not to use biased or unbiased sample 936 covariance. Default is True, the unbiased sample covariance is 937 used. 938 """

939 - def __init__(self, X, unbiased=True):

940 941 self.X = X 942 if unbiased: 943 self.n = float(X[0].shape[0] - 1.0) 944 else: 945 self.n = float(X[0].shape[0]) 946 947 self.reset()

948

949 - def reset(self):

950 pass

951

952 - def f(self, w):

953 """Function value. 954 955 From the interface "Function". 956 957 Parameters 958 ---------- 959 w : Numpy array (p-by-1). The point at which to evaluate the function. 960 """ 961 wX = np.dot(self.X[0], w[0]).T 962 Yc = np.dot(self.X[1], w[1]) 963 wXYc = np.dot(wX, Yc)[0, 0] 964 965 return -((wXYc / self.n) ** 2.0)

966

967 - def grad(self, w, index):

968 """Gradient of the function. 969 970 From the interface "MultiblockGradient". 971 972 Parameters 973 ---------- 974 w : List of numpy arrays. The weight vectors, w[index] is the point at 975 which to evaluate the gradient. 976 977 index : Non-negative integer. Which variable the gradient is for. 978 979 Examples 980 -------- 981 >>> import numpy as np 982 >>> from parsimony.functions.multiblock.losses import LatentVariableCovarianceSquared 983 >>> 984 >>> np.random.seed(42) 985 >>> X = np.random.rand(100, 150) 986 >>> Y = np.random.rand(100, 50) 987 >>> w = np.random.rand(150, 1) 988 >>> c = np.random.rand(50, 1) 989 >>> cov = LatentVariableCovarianceSquared([X, Y]) 990 >>> grad = cov.grad([w, c], 0) 991 >>> approx_grad = cov.approx_grad([w, c], 0) 992 >>> np.allclose(grad, approx_grad) 993 True 994 """ 995 wX = np.dot(self.X[0], w[0]).T 996 Yc = np.dot(self.X[1], w[1]) 997 wXYc = np.dot(wX, Yc)[0, 0] 998 999 index = int(index) 1000 grad = np.dot(self.X[index].T, 1001 np.dot(self.X[1 - index], w[1 - index])) \ 1002 * ((2.0 * wXYc) / (self.n * self.n)) 1003 1004 return -grad

1005

1006 - def L(self, w, index):

1007 """Lipschitz constant of the gradient with given index. 1008 1009 From the interface "MultiblockLipschitzContinuousGradient". 1010 """ 1011 index = int(index) 1012 grad = np.dot(self.X[index].T, 1013 np.dot(self.X[1 - index], w[1 - index])) \ 1014 * (1.0 / self.n) 1015 1016 return 2.0 * maths.norm(grad) ** 2.0

1017 1018

1019 -class GeneralisedMultiblock(mb_properties.MultiblockFunction, 1020 mb_properties.MultiblockGradient, 1021 # mb_properties.MultiblockProximalOperator, 1022 mb_properties.MultiblockProjectionOperator, 1023 properties.StepSize, 1024 # LipschitzContinuousGradient, 1025 # NesterovFunction, Continuation, DualFunction 1026 ):

1027

1028 - def __init__(self, X, functions):

1029 1030 self.X = X 1031 self.functions = functions 1032 1033 self.reset()

1034

1035 - def reset(self):

1036 1037 for i in xrange(len(self.functions)): 1038 for j in xrange(len(self.functions[i])): 1039 if i == j: 1040 for k in xrange(len(self.functions[i][j])): 1041 self.functions[i][j][k].reset() 1042 else: 1043 if not self.functions[i][j] is None: 1044 self.functions[i][j].reset()

1045

1046 - def f(self, w):

1047 """Function value. 1048 """ 1049 val = 0.0 1050 for i in xrange(len(self.functions)): 1051 fi = self.functions[i] 1052 for j in xrange(len(fi)): 1053 fij = fi[j] 1054 if i == j and isinstance(fij, (list, tuple)): 1055 for k in xrange(len(fij)): 1056 # print "Diag: ", i 1057 val += fij[k].f(w[i]) 1058 else: 1059 # print "f(w[%d], w[%d])" % (i, j) 1060 if not fij is None: 1061 val += fij.f([w[i], w[j]]) 1062 1063 # TODO: Check instead if it is a numpy array. 1064 if not isinstance(val, numbers.Number): 1065 return val[0, 0] 1066 else: 1067 return val

1068

1069 - def grad(self, w, index):

1070 """Gradient of the differentiable part of the function. 1071 1072 From the interface "MultiblockGradient". 1073 """ 1074 grad = 0.0 1075 fi = self.functions[index] 1076 for j in xrange(len(fi)): 1077 fij = fi[j] 1078 if index != j: 1079 if isinstance(fij, properties.Gradient): 1080 grad += fij.grad(w[index]) 1081 elif isinstance(fij, mb_properties.MultiblockGradient): 1082 grad += fij.grad([w[index], w[j]], 0) 1083 1084 for i in xrange(len(self.functions)): 1085 fij = self.functions[i][index] 1086 if i != index: 1087 if isinstance(fij, properties.Gradient): 1088 # We shouldn't do anything here, right? This means e.g. 1089 # that this (block i) is the y of a logistic regression. 1090 pass 1091 # grad += fij.grad(w) 1092 elif isinstance(fij, mb_properties.MultiblockGradient): 1093 grad += fij.grad([w[i], w[index]], 1) 1094 1095 fii = self.functions[index][index] 1096 for k in xrange(len(fii)): 1097 if isinstance(fii[k], properties.Gradient): 1098 grad += fii[k].grad(w[index]) 1099 1100 return grad

1101 1102 # def prox(self, w, index, factor=1.0): 1103 # """The proximal operator corresponding to the function with the index. 1104 # 1105 # From the interface "MultiblockProximalOperator". 1106 # """ 1107 ## # Find a proximal operator. 1108 ## fii = self.functions[index][index] 1109 ## for k in xrange(len(fii)): 1110 ## if isinstance(fii[k], ProximalOperator): 1111 ## w[index] = fii[k].prox(w[index], factor) 1112 ## break 1113 ## # If no proximal operator was found, we will just return the same 1114 ## # vectors again. The proximal operator of the zero function returns the 1115 ## # vector itself. 1116 # 1117 # return w 1118

1119 - def proj(self, w, index):

1120 """The projection operator corresponding to the function with the 1121 index. 1122 1123 From the interface "MultiblockProjectionOperator". 1124 """ 1125 # Find a projection operators. 1126 # fii = self.functions[index][index] 1127 f = self.get_constraints(index) 1128 for k in xrange(len(f)): 1129 if isinstance(f[k], properties.ProjectionOperator): 1130 w[index] = f[k].proj(w[index]) 1131 break 1132 1133 # If no projection operator was found, we will just return the same 1134 # vectors again. 1135 return w

1136

1137 - def step(self, w, index):

1138 1139 # return 0.0001 1140 1141 all_lipschitz = True 1142 1143 # Add the Lipschitz constants. 1144 L = 0.0 1145 fi = self.functions[index] 1146 for j in xrange(len(fi)): 1147 if j != index and fi[j] is not None: 1148 fij = fi[j] 1149 if isinstance(fij, properties.LipschitzContinuousGradient): 1150 L += fij.L() 1151 elif isinstance(fij, 1152 mb_properties.MultiblockLipschitzContinuousGradient): 1153 L += fij.L(w, index) 1154 else: 1155 all_lipschitz = False 1156 break 1157 1158 if all_lipschitz: 1159 fii = self.functions[index][index] 1160 for k in xrange(len(fii)): 1161 if fi[j] is None: 1162 continue 1163 if isinstance(fii[k], properties.LipschitzContinuousGradient): 1164 L += fii[k].L() 1165 elif isinstance(fii[k], 1166 mb_properties.MultiblockLipschitzContinuousGradient): 1167 L += fii[k].L(w, index) 1168 else: 1169 all_lipschitz = False 1170 break 1171 1172 if all_lipschitz and L > 0.0: 1173 t = 1.0 / L 1174 else: 1175 # If all functions did not have Lipschitz continuous gradients, 1176 # try to find the step size through backtracking line search. 1177 class F(properties.Function, 1178 properties.Gradient): 1179 def __init__(self, func, w, index): 1180 self.func = func 1181 self.w = w 1182 self.index = index

1183 1184 def f(self, x): 1185 1186 # Temporarily replace the index:th variable with x. 1187 w_old = self.w[self.index] 1188 self.w[self.index] = x 1189 f = self.func.f(w) 1190 self.w[self.index] = w_old 1191 1192 return f

1193 1194 def grad(self, x): 1195 1196 # Temporarily replace the index:th variable with x. 1197 w_old = self.w[self.index] 1198 self.w[self.index] = x 1199 g = self.func.grad(w, index) 1200 self.w[self.index] = w_old 1201 1202 return g 1203 1204 func = F(self, w, index) 1205 p = -self.grad(w, index) 1206 1207 from algorithms import BacktrackingLineSearch 1208 import parsimony.functions.penalties as penalties 1209 line_search = BacktrackingLineSearch( 1210 condition=penalties.SufficientDescentCondition, max_iter=30) 1211 a = np.sqrt(1.0 / self.X[index].shape[1]) # Arbitrarily "small". 1212 t = line_search(func, w[index], p, rho=0.5, a=a, c=1e-4) 1213 1214 return t 1215

Source Code for Module parsimony.functions.multiblock.losses