1
2 """
3 The :mod:`parsimony.functions.losses` module contains multiblock loss
4 functions.
5
6 Copyright (c) 2013-2014, CEA/DSV/I2BM/Neurospin. All rights reserved.
7
8 Created on Tue Feb 4 08:51:43 2014
9
10 @author: Tommy Löfstedt
11 @email: lofstedt.tommy@gmail.com
12 @license: BSD 3-clause.
13 """
14 import abc
15 import numbers
16
17 import numpy as np
18
19 import parsimony.utils as utils
20 import parsimony.utils.maths as maths
21 import parsimony.functions.properties as properties
22 import parsimony.utils.consts as consts
23 import properties as mb_properties
24
25 __all__ = ["CombinedMultiblockFunction",
26 "MultiblockFunctionWrapper", "MultiblockNesterovFunctionWrapper",
27 "LatentVariableCovariance"]
28
29
30 -class CombinedMultiblockFunction(mb_properties.MultiblockFunction,
31 mb_properties.MultiblockGradient,
32 mb_properties.MultiblockProximalOperator,
33 mb_properties.MultiblockProjectionOperator,
34
35 mb_properties.MultiblockStepSize):
36 """Combines one or more loss functions, any number of penalties and zero
37 or one proximal operator.
38
39 This function thus represents
40
41 f(x) = g_1(x) [ + g_2(x) ... ] [ + d_1(x) ... ] [ + N_1(x) ...]
42 [ + h_1(x) ...],
43
44 subject to [ C_1(x) <= c_1,
45 C_2(x) <= c_2,
46 ... ],
47
48 where g_i are differentiable Functions that may be multiblock, d_j are
49 differentiable penalties, h_k are a ProximalOperators and N_l are
50 NesterovFunctions. All functions and penalties must thus have Gradients,
51 unless they are ProximalOperators.
52
53 If no ProximalOperator is given, prox will be the identity.
54
55 Parameters
56 ----------
57 X : List of numpy arrays. The blocks of data in the multiblock model.
58
59 functions : List of lists of lists. A function matrix, with element
60 i,j connecting block i to block j.
61
62 penalties : A list of lists. Element i of the outer list is also a list
63 that contains the penalties for block i.
64
65 prox : A list of lists. Element i of the outer list is also a list that
66 contains the penalties that can be expressed as proximal operators
67 for block i.
68
69 constraints : A list of lists. Element i of the outer list is also a list
70 that contains the constraints for block i.
71 """
72 - def __init__(self, X, functions=[], penalties=[], prox=[], constraints=[]):
73
74 self.K = len(X)
75 self.X = X
76
77 if len(functions) != self.K:
78 self._f = [0] * self.K
79 for i in xrange(self.K):
80 self._f[i] = [0] * self.K
81 for j in xrange(self.K):
82 self._f[i][j] = list()
83 else:
84 self._f = functions
85
86 if len(penalties) != self.K:
87 self._p = [0] * self.K
88 self._N = [0] * self.K
89 for i in xrange(self.K):
90 self._p[i] = list()
91 self._N[i] = list()
92 else:
93 self._p = [0] * self.K
94 self._N = [0] * self.K
95 for i in xrange(self.K):
96 self._p[i] = list()
97 self._N[i] = list()
98 for di in penalties[i]:
99 if isinstance(di, properties.NesterovFunction):
100 self._N[i].append(di)
101 else:
102 self._p[i].append(di)
103
104 if len(prox) != self.K:
105 self._prox = [0] * self.K
106 for i in xrange(self.K):
107 self._prox[i] = list()
108 else:
109 self._prox = prox
110
111 if len(constraints) != self.K:
112 self._c = [0] * self.K
113 for i in xrange(self.K):
114 self._c[i] = list()
115 else:
116 self._c = constraints
117
119
120 for fi in self._f:
121 for fij in fi:
122 for fijk in fij:
123 fijk.reset()
124
125 for pi in self._p:
126 for pik in pi:
127 pik.reset()
128
129 for Ni in self._N:
130 for Nik in Ni:
131 Nik.reset()
132
133 for proxi in self._prox:
134 for proxik in proxi:
135 proxik.reset()
136
137 for ci in self._c:
138 for cik in ci:
139 cik.reset()
140
142 """Add a function that connects blocks i and j.
143
144 Parameters
145 ----------
146 function : Function or MultiblockFunction. A function that connects
147 block i and block j.
148
149 i : Non-negative integer. Index of the first block. Zero based, so 0
150 is the first block.
151
152 j : Non-negative integer. Index of the second block. Zero based, so 0
153 is the first block.
154 """
155 if not isinstance(function, properties.Gradient):
156 if not isinstance(function, mb_properties.MultiblockGradient):
157 raise ValueError("Functions must have gradients.")
158
159 self._f[i][j].append(function)
160
162
163 if not isinstance(penalty, properties.Penalty):
164 raise ValueError("Not a penalty.")
165
166 if isinstance(penalty, properties.NesterovFunction):
167 self._N[i].append(penalty)
168 else:
169 if isinstance(penalty, properties.Gradient):
170 self._p[i].append(penalty)
171 else:
172 if isinstance(penalty, properties.ProximalOperator):
173 self._prox[i].append(penalty)
174 else:
175 raise ValueError("Non-smooth and no proximal operator.")
176
177
179
180 if not isinstance(penalty, properties.ProximalOperator):
181 raise ValueError("Not a proximal operator.")
182
183 self._prox[i].append(penalty)
184
186
187 if not isinstance(constraint, properties.Constraint):
188 raise ValueError("Not a constraint.")
189 if not isinstance(constraint, properties.ProjectionOperator):
190 raise ValueError("Constraints must have projection operators.")
191
192 self._c[i].append(constraint)
193
195
196 return len(self._N[index]) > 0
197
199 """Function value.
200
201 Parameters
202 ----------
203 w : List of numpy arrays. The weight vectors.
204 """
205 val = 0.0
206
207 for i in xrange(len(self._f)):
208 fi = self._f[i]
209 for j in xrange(len(fi)):
210 fij = self._f[i][j]
211 for k in xrange(len(fij)):
212 val += fij[k].f([w[i], w[j]])
213
214 for i in xrange(len(self._p)):
215 pi = self._p[i]
216 for k in xrange(len(pi)):
217 val += pi[k].f(w[i])
218
219 for i in xrange(len(self._N)):
220 Ni = self._N[i]
221 for k in xrange(len(Ni)):
222 val += Ni[k].f(w[i])
223
224 for i in xrange(len(self._prox)):
225 proxi = self._prox[i]
226 for k in xrange(len(proxi)):
227 val += proxi[k].f(w[i])
228
229 return val
230
232 """Function value of smoothed function.
233
234 Parameters
235 ----------
236 w : List of numpy arrays. The weight vectors.
237 """
238 val = 0.0
239
240 for i in xrange(len(self._f)):
241 fi = self._f[i]
242 for j in xrange(len(fi)):
243 fij = self._f[i][j]
244 for k in xrange(len(fij)):
245 val += fij[k].f([w[i], w[j]])
246
247 for i in xrange(len(self._p)):
248 pi = self._p[i]
249 for k in xrange(len(pi)):
250 val += pi[k].f(w[i])
251
252 for i in xrange(len(self._N)):
253 Ni = self._N[i]
254 for k in xrange(len(Ni)):
255 val += Ni[k].fmu(w[i])
256
257 for i in xrange(len(self._prox)):
258 proxi = self._prox[i]
259 for k in xrange(len(proxi)):
260 val += proxi[k].f(w[i])
261
262 return val
263
264 - def grad(self, w, index):
265 """Gradient of the differentiable part of the function.
266
267 From the interface "MultiblockGradient".
268
269 Parameters
270 ----------
271 w : List of numpy arrays. The weight vectors, w[index] is the point at
272 which to evaluate the gradient.
273
274 index : Non-negative integer. Which variable the step is for.
275 """
276 grad = np.zeros(w[index].shape)
277
278
279 fi = self._f[index]
280 for j in xrange(len(fi)):
281 fij = fi[j]
282 for k in xrange(len(fij)):
283 fijk = fij[k]
284 if isinstance(fijk, properties.Gradient):
285 grad += fijk.grad(w[index])
286 elif isinstance(fijk, mb_properties.MultiblockGradient):
287 grad += fijk.grad([w[index], w[j]], 0)
288
289 for i in xrange(len(self._f)):
290 fij = self._f[i][index]
291 if i != index:
292 for k in xrange(len(fij)):
293 fijk = fij[k]
294 if isinstance(fijk, properties.Gradient):
295
296
297 pass
298
299 elif isinstance(fijk, mb_properties.MultiblockGradient):
300 grad += fijk.grad([w[i], w[index]], 1)
301
302
303 pi = self._p[index]
304 for k in xrange(len(pi)):
305 grad += pi[k].grad(w[index])
306
307 Ni = self._N[index]
308 for k in xrange(len(Ni)):
309 grad += Ni[k].grad(w[index])
310
311 return grad
312
314 """The proximal operator of the non-differentiable part of the
315 function with the given index.
316
317 From the interface "MultiblockProximalOperator".
318
319 Parameters
320 ----------
321 w : List of numpy arrays. The weight vectors.
322
323 index : Non-negative integer. Which variable the step is for.
324
325 factor : Positive float. A factor by which the Lagrange multiplier is
326 scaled. This is usually the step size.
327 """
328 prox = self._prox[index]
329 proj = self._c[index]
330
331 if len(prox) == 0 and len(proj) == 0:
332 prox_w = w[index]
333
334 elif len(prox) == 1 and len(proj) == 0:
335 prox_w = prox[0].prox(w[index])
336
337 elif len(prox) == 0 and (len(proj) == 1 or len(proj) == 2):
338 prox_w = self.proj(w, index, eps=eps, max_iter=max_iter)
339
340 else:
341 from parsimony.algorithms.proximal \
342 import ParallelDykstrasProximalAlgorithm
343 combo = ParallelDykstrasProximalAlgorithm(output=False,
344 eps=eps,
345 max_iter=max_iter,
346 min_iter=1)
347 prox_w = combo.run(w[index], prox=prox, proj=proj, factor=factor)
348
349 return prox_w
350
352 """The projection operator of a constraint that corresponds to the
353 function with the given index.
354
355 From the interface "MultiblockProjectionOperator".
356
357 Parameters
358 ----------
359 w : List of numpy arrays. The weight vectors.
360
361 index : Non-negative integer. Which variable the step is for.
362 """
363 prox = self._prox[index]
364 proj = self._c[index]
365 if len(proj) == 1 and len(prox) == 0:
366 proj_w = proj[0].proj(w[index], eps=eps, max_iter=max_iter)
367
368 elif len(proj) == 2 and len(prox) == 0:
369 from parsimony.algorithms.proximal \
370 import DykstrasProjectionAlgorithm
371 combo = DykstrasProjectionAlgorithm(eps=eps,
372 max_iter=max_iter, min_iter=1)
373
374 proj_w = combo.run(proj, w[index])
375
376 else:
377 proj_w = self.prox(w, index, eps=eps, max_iter=max_iter)
378
379 return proj_w
380
381 - def step(self, w, index):
382 """The step size to use in descent methods.
383
384 From the interface "StepSize".
385
386 Parameters
387 ----------
388 w : Numpy array. The point at which to determine the step size.
389
390 index : Non-negative integer. The variable which the step is for.
391 """
392 all_lipschitz = True
393 L = 0.0
394
395
396 fi = self._f[index]
397 for j in xrange(len(fi)):
398 fij = fi[j]
399 for k in xrange(len(fij)):
400 fijk = fij[k]
401 if isinstance(fijk, properties.Gradient):
402 if not isinstance(fijk,
403 properties.LipschitzContinuousGradient):
404 all_lipschitz = False
405 break
406 else:
407 L += fijk.L(w[index])
408 elif isinstance(fijk, mb_properties.MultiblockGradient):
409 if not isinstance(fijk,
410 mb_properties.MultiblockLipschitzContinuousGradient):
411 all_lipschitz = False
412 break
413 else:
414 L += fijk.L([w[index], w[j]], 0)
415
416 if not all_lipschitz:
417 break
418
419 for i in xrange(len(self._f)):
420 fij = self._f[i][index]
421 if i != index:
422 for k in xrange(len(fij)):
423 fijk = fij[k]
424 if isinstance(fijk, properties.Gradient):
425
426
427
428 pass
429 elif isinstance(fijk, mb_properties.MultiblockGradient):
430 if not isinstance(fijk,
431 mb_properties.MultiblockLipschitzContinuousGradient):
432 all_lipschitz = False
433 break
434 else:
435 L += fijk.L([w[i], w[index]], 1)
436
437
438 pi = self._p[index]
439 for k in xrange(len(pi)):
440 if not isinstance(pi[k], properties.LipschitzContinuousGradient):
441 all_lipschitz = False
442 break
443 else:
444 L += pi[k].L()
445
446 Ni = self._N[index]
447 for k in xrange(len(Ni)):
448 if not isinstance(Ni[k], properties.LipschitzContinuousGradient):
449 all_lipschitz = False
450 break
451 else:
452 L += Ni[k].L()
453
454 step = 0.0
455 if all_lipschitz and L >= consts.TOLERANCE:
456 step = 1.0 / L
457 else:
458
459
460 class F(properties.Function,
461 properties.Gradient):
462
463 def __init__(self, func, w, index):
464 self.func = func
465 self.w = w
466 self.index = index
467
468 def f(self, x):
469
470
471 w_old = self.w[self.index]
472 self.w[self.index] = x
473 f = self.func.f(w)
474 self.w[self.index] = w_old
475
476 return f
477
478 def grad(self, x):
479
480
481 w_old = self.w[self.index]
482 self.w[self.index] = x
483 g = self.func.grad(w, index)
484 self.w[self.index] = w_old
485
486 return g
487
488 func = F(self, w, index)
489 p = -self.grad(w, index)
490
491 from parsimony.algorithms.utils import BacktrackingLineSearch
492 import parsimony.functions.penalties as penalties
493 line_search = BacktrackingLineSearch(
494 condition=penalties.SufficientDescentCondition, max_iter=30)
495 a = np.sqrt(1.0 / self.X[index].shape[1])
496 step = line_search.run(func, w[index], p, rho=0.5, a=a,
497 condition_params={"c": 1e-4})
498
499 return step
500
501
502 -class MultiblockFunctionWrapper(properties.CompositeFunction,
503 properties.Gradient,
504 properties.StepSize,
505 properties.ProximalOperator):
506
507 - def __init__(self, function, w, index):
508 self.function = function
509 self.w = w
510 self.index = index
511
513 """Function value.
514
515 From the interface "Function".
516
517 Parameters
518 ----------
519 w : Numpy array (p-by-1). The point at which to evaluate the function.
520 """
521 return self.function.f(self.w[:self.index] + \
522 [w] + \
523 self.w[self.index + 1:])
524
526 """Gradient of the function.
527
528 Parameters
529 ----------
530 w : Numpy array (p-by-1). The point at which to evaluate the gradient.
531 """
532 return self.function.grad(self.w[:self.index] + \
533 [w] + \
534 self.w[self.index + 1:],
535 self.index)
536
538 """The proximal operator corresponding to the function.
539
540 Parameters
541 ----------
542 w : Numpy array (p-by-1). The point at which to apply the proximal
543 operator.
544
545 factor : Positive float. A factor by which the Lagrange multiplier is
546 scaled. This is usually the step size.
547 """
548 return self.function.prox(self.w[:self.index] + \
549 [w] + \
550 self.w[self.index + 1:],
551 self.index, factor=factor,
552 eps=eps, max_iter=max_iter)
553
554 - def step(self, w, index=0):
555 """The step size to use in descent methods.
556
557 Parameters
558 ----------
559 w : Numpy array. The point at which to determine the step size.
560 """
561 return self.function.step(self.w[:self.index] + \
562 [w] + \
563 self.w[self.index + 1:],
564 self.index)
565
566
570
571 - def __init__(self, function, w, index):
575
577
578 Ni = self.function._N[self.index]
579
580 ret = dict()
581 for k in args:
582 params = []
583 for N in Ni:
584 value = getattr(N, k)
585 params.append(value)
586
587 ret[k] = params
588
589 return ret
590
591 - def fmu(self, beta, mu=None):
592 """Returns the smoothed function value.
593
594 From the interface "NesterovFunction".
595
596 Parameters
597 ----------
598 beta : Numpy array. A weight vector.
599
600 mu : Non-negative float. The regularisation constant for the smoothing.
601 """
602 Ni = self.function._N[self.index]
603 f = 0.0
604 for N in Ni:
605 f += N.fmu(beta, mu=mu)
606
607 return f
608
609 - def phi(self, alpha, beta):
610 """ Function value with known alpha.
611
612 From the interface "NesterovFunction".
613 """
614 raise NotImplementedError('Abstract method "phi" must be '
615 'specialised!')
616
618 """Returns the regularisation constant for the smoothing.
619
620 From the interface "NesterovFunction".
621 """
622 Ni = self.function._N[self.index]
623 if len(Ni) == 0:
624 raise ValueError("No penalties are Nesterov functions.")
625
626 return Ni[0].get_mu()
627
629 """Sets the regularisation constant for the smoothing.
630
631 From the interface "NesterovFunction".
632
633 Parameters
634 ----------
635 mu : Non-negative float. The regularisation constant for the smoothing
636 to use from now on.
637
638 Returns
639 -------
640 old_mu : Non-negative float. The old regularisation constant for the
641 smoothing that was overwritten and no longer is used.
642 """
643 old_mu = self.get_mu()
644
645 Ni = self.function._N[self.index]
646 for N in Ni:
647 N.set_mu(mu)
648
649 return old_mu
650
652 """ Dual variable of the Nesterov function.
653
654 From the interface "NesterovFunction".
655
656 Parameters
657 ----------
658 beta : Numpy array (p-by-1). The variable for which to compute the dual
659 variable alpha.
660 """
661 Ni = self.function._N[self.index]
662 alpha = []
663 for N in Ni:
664 alpha += N.alpha(beta)
665
666 return alpha
667
669 """ Linear operator of the Nesterov function.
670
671 From the interface "NesterovFunction".
672 """
673 Ni = self.function._N[self.index]
674 A = []
675 for N in Ni:
676 A += N.A()
677
678 - def Aa(self, alpha):
679 """ Compute A'*alpha.
680
681 From the interface "NesterovFunction".
682
683 Parameters
684 ----------
685 alpha : Numpy array (x-by-1). The dual variable alpha.
686 """
687 A = self.A()
688 Aa = A[0].T.dot(alpha[0])
689 for i in xrange(1, len(A)):
690 Aa += A[i].T.dot(alpha[i])
691
692 return Aa
693
695 """ Projection onto the compact space of the Nesterov function.
696
697 From the interface "NesterovFunction".
698
699 Parameters
700 ----------
701 alpha : Numpy array (x-by-1). The not-yet-projected dual variable
702 alpha.
703 """
704 Ni = self.function._N[self.index]
705 a = []
706 i = 0
707 for N in Ni:
708 A = N.A()
709 a += N.project(alpha[i:len(A)])
710 i += len(A)
711
712 return a
713
715 """ The maximum value of the regularisation of the dual variable. We
716 have
717
718 M = max_{alpha in K} 0.5*|alpha|²_2.
719
720 From the interface "NesterovFunction".
721 """
722 Ni = self.function._N[self.index]
723 M = 0.0
724 for N in Ni:
725 M += N.M()
726
727 return M
728
730 """ Compute a "good" value of mu with respect to the given beta.
731
732 From the interface "NesterovFunction".
733
734 Parameters
735 ----------
736 beta : Numpy array (p-by-1). The primal variable at which to compute a
737 feasible value of mu.
738 """
739 Ni = self.function._N[self.index]
740 mu = consts.TOLERANCE
741 for N in Ni:
742 mu = max(mu, N.estimate_mu(beta))
743
744 return mu
745
747 """The optimal value of mu given epsilon.
748
749 Parameters
750 ----------
751 eps : Positive float. The desired precision.
752
753 Returns
754 -------
755 mu : Positive float. The optimal regularisation parameter.
756
757 From the interface "Continuation".
758 """
759 raise NotImplementedError('Abstract method "mu_opt" must be '
760 'specialised!')
761
763 """The optimal value of epsilon given mu.
764
765 Parameters
766 ----------
767 mu : Positive float. The regularisation constant of the smoothing.
768
769 Returns
770 -------
771 eps : Positive float. The optimal precision.
772
773 From the interface "Continuation".
774 """
775 raise NotImplementedError('Abstract method "eps_opt" must be '
776 'specialised!')
777
779 """The maximum value of epsilon.
780
781 From the interface "Continuation".
782
783 Parameters
784 ----------
785 mu : Positive float. The regularisation constant of the smoothing.
786
787 Returns
788 -------
789 eps : Positive float. The upper limit, the maximum, precision.
790 """
791 Ni = self.function._N[self.index]
792 gM = 0.0
793 for N in Ni:
794 gM += N.l * N.M()
795
796 return float(mu) * gM
797
799 """The maximum value of mu.
800
801 From the interface "Continuation".
802
803 Parameters
804 ----------
805 eps : Positive float. The maximum precision of the smoothing.
806
807 Returns
808 -------
809 mu : Positive float. The upper limit, the maximum, of the
810 regularisation constant of the smoothing.
811 """
812 Ni = self.function._N[self.index]
813 gM = 0.0
814 for N in Ni:
815 gM += N.l * N.M()
816
817 return float(eps) / gM
818
819
820 -class LatentVariableCovariance(mb_properties.MultiblockFunction,
821 mb_properties.MultiblockGradient,
822 mb_properties.MultiblockLipschitzContinuousGradient):
823
824 """Represents
825
826 Cov(X.w, Y.c) = (1 / (n - 1)) * w'.X'.Y.c,
827
828 where X.w and Y.c are latent variables.
829
830 Parameters
831 ----------
832 X : List with two numpy arrays. The two blocks.
833
834 unbiased : Boolean. Whether or not to use biased or unbiased sample
835 covariance. Default is True, the unbiased sample covariance is
836 used.
837 """
839
840 self.X = X
841 if unbiased:
842 self.n = float(X[0].shape[0] - 1.0)
843 else:
844 self.n = float(X[0].shape[0])
845
846 self.reset()
847
849
850 self._lambda_max = None
851
853 """Function value.
854
855 From the interface "Function".
856 """
857 wX = np.dot(self.X[0], w[0]).T
858 Yc = np.dot(self.X[1], w[1])
859 wXYc = np.dot(wX, Yc)
860
861 return -wXYc[0, 0] / self.n
862
863 - def grad(self, w, index):
864 """Gradient of the function.
865
866 From the interface "MultiblockGradient".
867
868 Parameters
869 ----------
870 w : List of numpy arrays. The weight vectors, w[index] is the point at
871 which to evaluate the gradient.
872
873 index : Non-negative integer. Which variable the gradient is for.
874
875 Examples
876 --------
877 >>> import numpy as np
878 >>> from parsimony.functions.multiblock.losses import LatentVariableCovariance
879 >>>
880 >>> np.random.seed(42)
881 >>> X = np.random.rand(100, 150)
882 >>> Y = np.random.rand(100, 50)
883 >>> w = np.random.rand(150, 1)
884 >>> c = np.random.rand(50, 1)
885 >>> cov = LatentVariableCovariance([X, Y])
886 >>> grad = cov.grad([w, c], 0)
887 >>> approx_grad = cov.approx_grad([w, c], 0)
888 >>> np.allclose(grad, approx_grad)
889 True
890 """
891 index = int(index)
892 grad = -np.dot(self.X[index].T,
893 np.dot(self.X[1 - index], w[1 - index])) \
894 * (1.0 / self.n)
895
896 return grad
897
898 - def L(self, w, index):
899 """Lipschitz constant of the gradient with given index.
900
901 From the interface "MultiblockLipschitzContinuousGradient".
902 """
903
904
905 return np.sqrt(consts.TOLERANCE)
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922 -class LatentVariableCovarianceSquared(mb_properties.MultiblockFunction,
923 mb_properties.MultiblockGradient,
924 mb_properties.MultiblockLipschitzContinuousGradient):
925 """Represents
926
927 Cov(X.w, Y.c)² = ((1 / (n - 1)) * w'.X'.Y.c)²,
928
929 where X.w and Y.c are latent variables.
930
931 Parameters
932 ----------
933 X : List with two numpy arrays. The two blocks.
934
935 unbiased : Boolean. Whether or not to use biased or unbiased sample
936 covariance. Default is True, the unbiased sample covariance is
937 used.
938 """
940
941 self.X = X
942 if unbiased:
943 self.n = float(X[0].shape[0] - 1.0)
944 else:
945 self.n = float(X[0].shape[0])
946
947 self.reset()
948
951
953 """Function value.
954
955 From the interface "Function".
956
957 Parameters
958 ----------
959 w : Numpy array (p-by-1). The point at which to evaluate the function.
960 """
961 wX = np.dot(self.X[0], w[0]).T
962 Yc = np.dot(self.X[1], w[1])
963 wXYc = np.dot(wX, Yc)[0, 0]
964
965 return -((wXYc / self.n) ** 2.0)
966
967 - def grad(self, w, index):
968 """Gradient of the function.
969
970 From the interface "MultiblockGradient".
971
972 Parameters
973 ----------
974 w : List of numpy arrays. The weight vectors, w[index] is the point at
975 which to evaluate the gradient.
976
977 index : Non-negative integer. Which variable the gradient is for.
978
979 Examples
980 --------
981 >>> import numpy as np
982 >>> from parsimony.functions.multiblock.losses import LatentVariableCovarianceSquared
983 >>>
984 >>> np.random.seed(42)
985 >>> X = np.random.rand(100, 150)
986 >>> Y = np.random.rand(100, 50)
987 >>> w = np.random.rand(150, 1)
988 >>> c = np.random.rand(50, 1)
989 >>> cov = LatentVariableCovarianceSquared([X, Y])
990 >>> grad = cov.grad([w, c], 0)
991 >>> approx_grad = cov.approx_grad([w, c], 0)
992 >>> np.allclose(grad, approx_grad)
993 True
994 """
995 wX = np.dot(self.X[0], w[0]).T
996 Yc = np.dot(self.X[1], w[1])
997 wXYc = np.dot(wX, Yc)[0, 0]
998
999 index = int(index)
1000 grad = np.dot(self.X[index].T,
1001 np.dot(self.X[1 - index], w[1 - index])) \
1002 * ((2.0 * wXYc) / (self.n * self.n))
1003
1004 return -grad
1005
1006 - def L(self, w, index):
1007 """Lipschitz constant of the gradient with given index.
1008
1009 From the interface "MultiblockLipschitzContinuousGradient".
1010 """
1011 index = int(index)
1012 grad = np.dot(self.X[index].T,
1013 np.dot(self.X[1 - index], w[1 - index])) \
1014 * (1.0 / self.n)
1015
1016 return 2.0 * maths.norm(grad) ** 2.0
1017
1018
1019 -class GeneralisedMultiblock(mb_properties.MultiblockFunction,
1020 mb_properties.MultiblockGradient,
1021
1022 mb_properties.MultiblockProjectionOperator,
1023 properties.StepSize,
1024
1025
1026 ):
1027
1034
1045
1047 """Function value.
1048 """
1049 val = 0.0
1050 for i in xrange(len(self.functions)):
1051 fi = self.functions[i]
1052 for j in xrange(len(fi)):
1053 fij = fi[j]
1054 if i == j and isinstance(fij, (list, tuple)):
1055 for k in xrange(len(fij)):
1056
1057 val += fij[k].f(w[i])
1058 else:
1059
1060 if not fij is None:
1061 val += fij.f([w[i], w[j]])
1062
1063
1064 if not isinstance(val, numbers.Number):
1065 return val[0, 0]
1066 else:
1067 return val
1068
1069 - def grad(self, w, index):
1070 """Gradient of the differentiable part of the function.
1071
1072 From the interface "MultiblockGradient".
1073 """
1074 grad = 0.0
1075 fi = self.functions[index]
1076 for j in xrange(len(fi)):
1077 fij = fi[j]
1078 if index != j:
1079 if isinstance(fij, properties.Gradient):
1080 grad += fij.grad(w[index])
1081 elif isinstance(fij, mb_properties.MultiblockGradient):
1082 grad += fij.grad([w[index], w[j]], 0)
1083
1084 for i in xrange(len(self.functions)):
1085 fij = self.functions[i][index]
1086 if i != index:
1087 if isinstance(fij, properties.Gradient):
1088
1089
1090 pass
1091
1092 elif isinstance(fij, mb_properties.MultiblockGradient):
1093 grad += fij.grad([w[i], w[index]], 1)
1094
1095 fii = self.functions[index][index]
1096 for k in xrange(len(fii)):
1097 if isinstance(fii[k], properties.Gradient):
1098 grad += fii[k].grad(w[index])
1099
1100 return grad
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119 - def proj(self, w, index):
1120 """The projection operator corresponding to the function with the
1121 index.
1122
1123 From the interface "MultiblockProjectionOperator".
1124 """
1125
1126
1127 f = self.get_constraints(index)
1128 for k in xrange(len(f)):
1129 if isinstance(f[k], properties.ProjectionOperator):
1130 w[index] = f[k].proj(w[index])
1131 break
1132
1133
1134
1135 return w
1136
1137 - def step(self, w, index):
1138
1139
1140
1141 all_lipschitz = True
1142
1143
1144 L = 0.0
1145 fi = self.functions[index]
1146 for j in xrange(len(fi)):
1147 if j != index and fi[j] is not None:
1148 fij = fi[j]
1149 if isinstance(fij, properties.LipschitzContinuousGradient):
1150 L += fij.L()
1151 elif isinstance(fij,
1152 mb_properties.MultiblockLipschitzContinuousGradient):
1153 L += fij.L(w, index)
1154 else:
1155 all_lipschitz = False
1156 break
1157
1158 if all_lipschitz:
1159 fii = self.functions[index][index]
1160 for k in xrange(len(fii)):
1161 if fi[j] is None:
1162 continue
1163 if isinstance(fii[k], properties.LipschitzContinuousGradient):
1164 L += fii[k].L()
1165 elif isinstance(fii[k],
1166 mb_properties.MultiblockLipschitzContinuousGradient):
1167 L += fii[k].L(w, index)
1168 else:
1169 all_lipschitz = False
1170 break
1171
1172 if all_lipschitz and L > 0.0:
1173 t = 1.0 / L
1174 else:
1175
1176
1177 class F(properties.Function,
1178 properties.Gradient):
1179 def __init__(self, func, w, index):
1180 self.func = func
1181 self.w = w
1182 self.index = index
1183
1184 def f(self, x):
1185
1186
1187 w_old = self.w[self.index]
1188 self.w[self.index] = x
1189 f = self.func.f(w)
1190 self.w[self.index] = w_old
1191
1192 return f
1193
1194 def grad(self, x):
1195
1196
1197 w_old = self.w[self.index]
1198 self.w[self.index] = x
1199 g = self.func.grad(w, index)
1200 self.w[self.index] = w_old
1201
1202 return g
1203
1204 func = F(self, w, index)
1205 p = -self.grad(w, index)
1206
1207 from algorithms import BacktrackingLineSearch
1208 import parsimony.functions.penalties as penalties
1209 line_search = BacktrackingLineSearch(
1210 condition=penalties.SufficientDescentCondition, max_iter=30)
1211 a = np.sqrt(1.0 / self.X[index].shape[1])
1212 t = line_search(func, w[index], p, rho=0.5, a=a, c=1e-4)
1213
1214 return t
1215