1
2 """
3 The :mod:`parsimony.functions.objectives.functions` module contains ready-made
4 common combinations of loss functions and penalties that can be used right
5 away to analyse real data.
6
7 Created on Mon Apr 22 10:54:29 2013
8
9 Copyright (c) 2013-2014, CEA/DSV/I2BM/Neurospin. All rights reserved.
10
11 @author: Tommy Löfstedt, Vincent Guillemot, Edouard Duchesnay and
12 Fouad Hadj-Selem
13 @email: lofstedt.tommy@gmail.com, edouard.duchesnay@cea.fr
14 @license: BSD 3-clause.
15 """
16 import numpy as np
17
18 from . import properties
19
20
21
22 from .nesterov.l1tv import L1TV
23 from .nesterov.tv import TotalVariation
24 from .nesterov.gl import GroupLassoOverlap
25 from .penalties import ZeroFunction, L1, LinearVariableConstraint
26 from .penalties import RidgeSquaredError
27 from .losses import RidgeRegression
28 from .losses import RidgeLogisticRegression
29 from .losses import LatentVariableVariance
30 import parsimony.utils.linalgs as linalgs
31 import parsimony.utils.consts as consts
32 import parsimony.utils.maths as maths
33 from parsimony.utils import deprecated
34
35 __all__ = ["CombinedFunction",
36 "LinearRegressionL1L2TV", "LinearRegressionL1L2GL",
37 "LogisticRegressionL1L2TV", "LogisticRegressionL1L2GL",
38 "LinearRegressionL2SmoothedL1TV",
39 "AugmentedLinearRegressionL1L2TV",
40 "PrincipalComponentAnalysisL1TV"]
41
42
43
44
45 -class CombinedFunction(properties.CompositeFunction,
46 properties.Gradient,
47 properties.ProximalOperator,
48 properties.ProjectionOperator,
49 properties.StepSize):
50 """Combines one or more loss functions, any number of penalties and zero
51 or one proximal operator.
52
53 This function thus represents
54
55 f(x) = f_1(x) [ + f_2(x) ... ] [ + p_1(x) ... ] [ + P(x)],
56
57 subject to [ C_1(x) <= c_1,
58 C_2(x) <= c_2,
59 ... ],
60
61 where f_i are differentiable Functions, p_j are differentiable penalties
62 and P is a ProximalOperator. All functions and penalties must thus be
63 Gradient, unless it is a ProximalOperator.
64
65 If no ProximalOperator is given, then prox is the identity.
66 """
67 - def __init__(self, functions=[], penalties=[], prox=[], constraints=[]):
75
89
91
92 if not isinstance(function, properties.Gradient):
93 raise ValueError("Loss functions must have gradients.")
94
95 self._f.append(function)
96
97 @deprecated("add_loss")
101
103
104 if not isinstance(penalty, properties.Penalty):
105 raise ValueError("Not a penalty.")
106 elif isinstance(penalty, properties.Gradient):
107 self._p.append(penalty)
108 elif isinstance(penalty, properties.ProximalOperator):
109 if len(self._c) > 0:
110
111 raise ValueError("Cannot have both ProximalOperator and "
112 "ProjectionOperator.")
113 else:
114
115 self._prox[0] = penalty
116 else:
117 raise ValueError("The penalty is not smooth, nor has it a "
118 "proximal operator.")
119
121
122 if not isinstance(constraint, properties.Constraint):
123 raise ValueError("Not a constraint.")
124 elif not isinstance(constraint, properties.ProjectionOperator):
125 raise ValueError("Constraints must have projection operators.")
126 elif not isinstance(self._prox[0], ZeroFunction):
127
128 raise ValueError("Cannot have both ProjectionOperator and "
129 "ProximalOperator.")
130 else:
131 self._c.append(constraint)
132
134
135 if not isinstance(penalty, properties.Penalty):
136 raise ValueError("Not a penalty.")
137 elif not isinstance(penalty, properties.Gradient):
138 raise ValueError("The penalty is not smooth, nor has it a "
139 "proximal operator.")
140 else:
141 self._p.append(penalty)
142
144
145 if not isinstance(penalty, properties.ProximalOperator):
146 raise ValueError("Not a proximal operator.")
147 elif len(self._c) > 0:
148
149 raise ValueError("Cannot have both ProximalOperator and "
150 "ProjectionOperator.")
151 else:
152
153 self._prox[0] = penalty
154
156
157 if not isinstance(penalty, properties.NesterovFunction):
158 raise ValueError("Not a Nesterov function.")
159 else:
160 self._p.append(penalty)
161
163 """Function value.
164 """
165 val = 0.0
166 for f in self._f:
167 val += f.f(x)
168
169 for p in self._p:
170 val += p.f(x)
171
172 for prox in self._prox:
173 val += prox.f(x)
174
175 return val
176
178 """Gradient of the differentiable part of the function.
179
180 From the interface "Gradient".
181 """
182 grad = 0.0
183
184
185 for f in self._f:
186 grad += f.grad(x)
187
188
189 for p in self._p:
190 grad += p.grad(x)
191
192 return grad
193
194 - def prox(self, x, factor=1.0, **kwargs):
195 """The proximal operator of the non-differentiable part of the
196 function.
197
198 From the interface "ProximalOperator".
199 """
200
201 return self._prox[0].prox(x, factor=factor, **kwargs)
202
204 raise NotImplementedError("Not yet implemented.")
205
249
250
251 -class LinearRegressionL1L2TV(properties.CompositeFunction,
252 properties.NesterovFunction,
253 properties.ProximalOperator,
254 properties.Continuation,
255 properties.DualFunction,
256 properties.StronglyConvex,
257 properties.StepSize):
258 """Combination (sum) of LinearRegression, L1, L2 and TotalVariation.
259
260 Parameters:
261 ----------
262 X : Numpy array. The X matrix for the linear regression.
263
264 y : Numpy array. The y vector for the linear regression.
265
266 l1 : Non-negative float. The Lagrange multiplier, or regularisation
267 constant, for the L1 penalty.
268
269 l2 : Non-negative float. The Lagrange multiplier, or regularisation
270 constant, for the ridge penalty.
271
272 tv : Non-negative float. The Lagrange multiplier, or regularisation
273 constant, of the smoothed TV function.
274
275 A : Numpy array (usually sparse). The linear operator for the Nesterov
276 formulation of TV. May not be None!
277
278 mu : Non-negative float. The regularisation constant for the smoothing
279 of the TV function.
280
281 penalty_start : Non-negative integer. The number of columns, variables
282 etc., to except from penalisation. Equivalently, the first
283 index to be penalised. Default is 0, all columns are included.
284
285 mean : Boolean. Whether to compute the squared loss or the mean
286 squared loss. Default is True, the mean squared loss.
287 """
288 - def __init__(self, X, y, l1, l2, tv, A=None, mu=0.0, penalty_start=0,
289 mean=True):
290 self.X = X
291 self.y = y
292
293 self.rr = RidgeRegression(X, y, l2, penalty_start=penalty_start,
294 mean=mean)
295 self.l1 = L1(l1, penalty_start=penalty_start)
296 self.tv = TotalVariation(tv, A=A, mu=mu, penalty_start=penalty_start)
297
298 self.penalty_start = penalty_start
299 self.mean = mean
300
301 self.reset()
302
304
305 self.rr.reset()
306 self.l1.reset()
307 self.tv.reset()
308
309 self._Xty = None
310 self._invXXkI = None
311 self._XtinvXXtkI = None
312
319
321 """Returns the regularisation constant for the smoothing.
322
323 From the interface "NesterovFunction".
324 """
325 return self.tv.get_mu()
326
328 """Sets the regularisation constant for the smoothing.
329
330 From the interface "NesterovFunction".
331
332 Parameters:
333 ----------
334 mu : Non-negative float. The regularisation constant for the smoothing
335 to use from now on.
336
337 Returns:
338 -------
339 old_mu : Non-negative float. The old regularisation constant for the
340 smoothing that was overwritten and is no longer used.
341 """
342 return self.tv.set_mu(mu)
343
345 """Function value.
346 """
347 return self.rr.f(beta) \
348 + self.l1.f(beta) \
349 + self.tv.f(beta)
350
351 - def fmu(self, beta, mu=None):
357
358 - def phi(self, alpha, beta):
359 """ Function value with known alpha.
360 """
361 return self.rr.f(beta) \
362 + self.l1.f(beta) \
363 + self.tv.phi(alpha, beta)
364
365 - def grad(self, beta):
366 """Gradient of the differentiable part of the function.
367
368 From the interface "Gradient".
369 """
370 return self.rr.grad(beta) \
371 + self.tv.grad(beta)
372
374 """Lipschitz constant of the gradient.
375
376 From the interface "LipschitzContinuousGradient".
377 """
378 return self.rr.L() \
379 + self.tv.L()
380
381 - def prox(self, beta, factor=1.0, **kwargs):
382 """The proximal operator of the non-differentiable part of the
383 function.
384
385 From the interface "ProximalOperator".
386 """
387 return self.l1.prox(beta, factor)
388
390 """Computes a "good" value of mu with respect to the given beta.
391
392 From the interface "NesterovFunction".
393 """
394 return self.tv.estimate_mu(beta)
395
397 """The maximum value of the regularisation of the dual variable. We
398 have
399
400 M = max_{alpha in K} 0.5*|alpha|²_2.
401
402 From the interface "NesterovFunction".
403 """
404 return self.tv.M()
405
407 """The optimal value of mu given epsilon.
408
409 From the interface "Continuation".
410 """
411 gM = self.tv.l * self.tv.M()
412
413
414
415 old_mu = self.tv.set_mu(1.0)
416 gA2 = self.tv.L()
417 self.tv.set_mu(old_mu)
418
419 Lg = self.rr.L()
420
421 return (-gM * gA2 + np.sqrt((gM * gA2) ** 2.0
422 + gM * Lg * gA2 * eps)) \
423 / (gM * Lg)
424
426 """The optimal value of epsilon given mu.
427
428 From the interface "Continuation".
429 """
430 gM = self.tv.l * self.tv.M()
431
432
433
434 old_mu = self.tv.set_mu(1.0)
435 gA2 = self.tv.L()
436 self.tv.set_mu(old_mu)
437
438 Lg = self.rr.L()
439
440 return (2.0 * gM * gA2 * mu
441 + gM * Lg * mu ** 2.0) \
442 / gA2
443
445 """The maximum value of epsilon.
446
447 From the interface "Continuation".
448
449 Parameters
450 ----------
451 mu : Positive float. The regularisation constant of the smoothing.
452
453 Returns
454 -------
455 eps : Positive float. The upper limit, the maximum, precision.
456 """
457 gM = self.tv.l * self.tv.M()
458
459 return float(mu) * gM
460
462 """The maximum value of mu.
463
464 From the interface "Continuation".
465
466 Parameters
467 ----------
468 eps : Positive float. The maximum precision of the smoothing.
469
470 Returns
471 -------
472 mu : Positive float. The upper limit, the maximum, of the
473 regularisation constant of the smoothing.
474 """
475 gM = self.tv.l * self.tv.M()
476
477 return float(eps) / gM
478
555
558 """Compute the duality gap.
559
560 From the interface "DualFunction".
561 """
562 if self.penalty_start > 0:
563 beta_ = beta[self.penalty_start:, :]
564 else:
565 beta_ = beta
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582 alpha = self.tv.alpha(beta)
583 g = self.fmu(beta)
584
585 n = float(self.X.shape[0])
586
587 if self.mean:
588 a = (np.dot(self.X, beta) - self.y) * (1.0 / n)
589 f_ = (n / 2.0) * maths.norm(a) ** 2.0 + np.dot(self.y.T, a)[0, 0]
590 else:
591 a = np.dot(self.X, beta) - self.y
592 f_ = (1.0 / 2.0) * maths.norm(a) ** 2.0 + np.dot(self.y.T, a)[0, 0]
593
594 lAta = self.tv.l * self.tv.Aa(alpha)
595 if self.penalty_start > 0:
596 lAta = np.vstack((np.zeros((self.penalty_start, 1)),
597 lAta))
598
599 alpha_sqsum = 0.0
600 for a_ in alpha:
601 alpha_sqsum += np.sum(a_ ** 2.0)
602
603 z = -np.dot(self.X.T, a)
604 h_ = (1.0 / (2 * self.rr.k)) \
605 * np.sum(maths.positive(np.abs(z - lAta) - self.l1.l) ** 2.0) \
606 + (0.5 * self.tv.l * self.tv.get_mu() * alpha_sqsum)
607
608
609
610
611 gap = g + f_ + h_
612
613
614
615 return gap
616
618 """Linear operator of the Nesterov function.
619
620 From the interface "NesterovFunction".
621 """
622 return self.tv.A()
623
624 - def Aa(self, alpha):
625 """Computes A'.alpha.
626
627 From the interface "NesterovFunction".
628 """
629 return self.tv.Aa(alpha)
630
632 """ Projection onto the compact space of the Nesterov function.
633
634 From the interface "NesterovFunction".
635 """
636 return self.tv.project(a)
637
639 """Returns the strongly convex parameter for the function.
640
641 From the interface "StronglyConvex".
642 """
643 return self.rr.k
644
646 """The step size to use in descent methods.
647
648 From the interface "StepSize".
649
650 Parameters
651 ----------
652 x : Numpy array. The point at which to evaluate the step size.
653 """
654 return 1.0 / self.L()
655
658 """Combination (sum) of RidgeRegression, L1 and Overlapping Group Lasso.
659 """
660 - def __init__(self, X, y, l1, l2, gl, A=None, mu=0.0, penalty_start=0,
661 mean=True):
662 """
663 Parameters:
664 ----------
665 X : Numpy array (n-by-p). The X matrix for the linear regression.
666
667 y : Numpy array (n-by-1). The y vector for the linear regression.
668
669 l1 : Non-negative float. The Lagrange multiplier, or regularisation
670 constant, for the L1 penalty.
671
672 l2 : Non-negative float. The Lagrange multiplier, or regularisation
673 constant, for the ridge penalty.
674
675 gl : Non-negative float. The Lagrange multiplier, or regularisation
676 constant, of the overlapping group L1-L2 function.
677
678 A : Numpy array (usually sparse). The linear operator for the Nesterov
679 formulation for group L1-L2. May not be None!
680
681 mu : Non-negative float. The regularisation constant for the smoothing
682 of the overlapping group L1-L2 function.
683
684 penalty_start : Non-negative integer. The number of columns, variables
685 etc., to except from penalisation. Equivalently, the first
686 index to be penalised. Default is 0, all columns are included.
687
688 mean : Boolean. Whether to compute the squared loss or the mean
689 squared loss. Default is True, the mean squared loss.
690 """
691 self.X = X
692 self.y = y
693
694 self.rr = RidgeRegression(X, y, l2, penalty_start=penalty_start,
695 mean=mean)
696 self.l1 = L1(l1, penalty_start=penalty_start)
697 self.gl = GroupLassoOverlap(gl, A=A, mu=mu,
698 penalty_start=penalty_start)
699
700 self.penalty_start = penalty_start
701 self.mean = mean
702
703 self.reset()
704
706
707 self.rr.reset()
708 self.l1.reset()
709 self.gl.reset()
710
711 self._Xty = None
712 self._invXXkI = None
713 self._XtinvXXtkI = None
714
722
724 """Returns the regularisation constant for the smoothing.
725
726 From the interface "NesterovFunction".
727 """
728 return self.gl.get_mu()
729
731 """Sets the regularisation constant for the smoothing.
732
733 From the interface "NesterovFunction".
734
735 Parameters:
736 ----------
737 mu : Non-negative float. The regularisation constant for the smoothing
738 to use from now on.
739
740 Returns:
741 -------
742 old_mu : Non-negative float. The old regularisation constant for the
743 smoothing that was overwritten and is no longer used.
744 """
745 return self.gl.set_mu(mu)
746
748 """Function value.
749 """
750 return self.rr.f(beta) \
751 + self.l1.f(beta) \
752 + self.gl.f(beta)
753
754 - def fmu(self, beta, mu=None):
760
761 - def phi(self, alpha, beta):
762 """ Function value with known alpha.
763 """
764 return self.rr.f(beta) \
765 + self.l1.f(beta) \
766 + self.gl.phi(alpha, beta)
767
768 - def grad(self, beta):
769 """Gradient of the differentiable part of the function.
770
771 From the interface "Gradient".
772 """
773 return self.rr.grad(beta) \
774 + self.gl.grad(beta)
775
777 """Lipschitz constant of the gradient.
778
779 From the interface "LipschitzContinuousGradient".
780 """
781 return self.rr.L() \
782 + self.gl.L()
783
784 - def prox(self, beta, factor=1.0, **kwargs):
785 """The proximal operator of the non-differentiable part of the
786 function.
787
788 From the interface "ProximalOperator".
789 """
790 return self.l1.prox(beta, factor, **kwargs)
791
793 """Computes a "good" value of mu with respect to the given beta.
794
795 From the interface "NesterovFunction".
796 """
797 return self.gl.estimate_mu(beta)
798
800 """The maximum value of the regularisation of the dual variable. We
801 have
802
803 M = max_{alpha in K} 0.5*|alpha|²_2.
804
805 From the interface "NesterovFunction".
806 """
807 return self.gl.M()
808
810 """The optimal value of mu given epsilon.
811
812 From the interface "Continuation".
813 """
814 gM = self.gl.l * self.gl.M()
815
816
817
818 old_mu = self.gl.set_mu(1.0)
819 gA2 = self.gl.L()
820 self.gl.set_mu(old_mu)
821
822 Lg = self.rr.L()
823
824 return (-gM * gA2 + np.sqrt((gM * gA2) ** 2.0
825 + gM * Lg * gA2 * eps)) \
826 / (gM * Lg)
827
829 """The optimal value of epsilon given mu.
830
831 From the interface "Continuation".
832 """
833 gM = self.gl.l * self.gl.M()
834
835
836
837 old_mu = self.gl.set_mu(1.0)
838 gA2 = self.gl.L()
839 self.gl.set_mu(old_mu)
840
841 Lg = self.rr.L()
842
843 return (2.0 * gM * gA2 * mu
844 + gM * Lg * mu ** 2.0) \
845 / gA2
846
848 """The maximum value of epsilon.
849
850 From the interface "Continuation".
851
852 Parameters
853 ----------
854 mu : Positive float. The regularisation constant of the smoothing.
855
856 Returns
857 -------
858 eps : Positive float. The upper limit, the maximum, precision.
859 """
860 gM = self.gl.l * self.gl.M()
861
862 return float(mu) * gM
863
865 """The maximum value of mu.
866
867 From the interface "Continuation".
868
869 Parameters
870 ----------
871 eps : Positive float. The maximum precision of the smoothing.
872
873 Returns
874 -------
875 mu : Positive float. The upper limit, the maximum, of the
876 regularisation constant of the smoothing.
877 """
878 gM = self.gl.l * self.gl.M()
879
880 return float(eps) / gM
881
971
974 """Compute the duality gap.
975
976 From the interface "DualFunction".
977 """
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998 alpha = self.gl.alpha(beta)
999 g = self.fmu(beta)
1000
1001 n = float(self.X.shape[0])
1002
1003 if self.mean:
1004 a = (np.dot(self.X, beta) - self.y) * (1.0 / n)
1005 f_ = (n / 2.0) * maths.norm(a) ** 2.0 + np.dot(self.y.T, a)[0, 0]
1006 else:
1007 a = np.dot(self.X, beta) - self.y
1008 f_ = (1.0 / 2.0) * maths.norm(a) ** 2.0 + np.dot(self.y.T, a)[0, 0]
1009
1010 lAta = self.gl.l * self.gl.Aa(alpha)
1011 if self.penalty_start > 0:
1012 lAta = np.vstack((np.zeros((self.penalty_start, 1)),
1013 lAta))
1014
1015 alpha_sqsum = 0.0
1016 for a_ in alpha:
1017 alpha_sqsum += np.sum(a_ ** 2.0)
1018
1019 z = -np.dot(self.X.T, a)
1020 h_ = (1.0 / (2 * self.rr.k)) \
1021 * np.sum(maths.positive(np.abs(z - lAta) - self.l1.l) ** 2.0) \
1022 + (0.5 * self.gl.l * self.gl.get_mu() * alpha_sqsum)
1023
1024
1025
1026
1027 gap = g + f_ + h_
1028
1029
1030
1031 return gap
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1072 """Linear operator of the Nesterov function.
1073
1074 From the interface "NesterovFunction".
1075 """
1076 return self.gl.A()
1077
1078 - def Aa(self, alpha):
1079 """Computes A^T.alpha.
1080
1081 From the interface "NesterovFunction".
1082 """
1083 return self.gl.Aa(alpha)
1084
1086 """ Projection onto the compact space of the Nesterov function.
1087
1088 From the interface "NesterovFunction".
1089 """
1090 return self.gl.project(a)
1091
1092 - def step(self, x):
1093 """The step size to use in descent methods.
1094
1095 From the interface "StepSize".
1096
1097 Parameters
1098 ----------
1099 x : Numpy array. The point at which to evaluate the step size.
1100 """
1101 return 1.0 / self.L()
1102
1105 """Combination (sum) of RidgeLogisticRegression, L1 and TotalVariation.
1106 """
1107 - def __init__(self, X, y, l1, l2, tv, A=None, mu=0.0, weights=None,
1108 penalty_start=0, mean=True):
1109 """
1110 Parameters
1111 ----------
1112 X : Numpy array (n-by-p). The X matrix for the logistic regression.
1113
1114 y : Numpy array (n-by-1). The y vector for the logistic regression.
1115
1116 l1 : Non-negative float. The Lagrange multiplier, or regularisation
1117 constant, for the L1 penalty.
1118
1119 l2 : Non-negative float. The Lagrange multiplier, or regularisation
1120 constant, for the ridge (L2) penalty.
1121
1122 tv : Non-negative float. The Lagrange multiplier, or regularisation
1123 constant, of the smoothed TV function.
1124
1125 A : Numpy array (usually sparse). The linear operator for the Nesterov
1126 formulation for TV. May not be None!
1127
1128 mu : Non-negative float. The regularisation constant for the smoothing
1129 of the TV function.
1130
1131 weights: List with n elements. The sample's weights.
1132
1133 penalty_start : Non-negative integer. The number of columns, variables
1134 etc., to except from penalisation. Equivalently, the first
1135 index to be penalised. Default is 0, all columns are included.
1136
1137 mean : Boolean. Whether to compute the squared loss or the mean
1138 squared loss. Default is True, the mean squared loss.
1139 """
1140 self.X = X
1141 self.y = y
1142
1143 self.rr = RidgeLogisticRegression(X, y, l2,
1144 weights=weights,
1145 penalty_start=penalty_start,
1146 mean=mean)
1147 self.l1 = L1(l1, penalty_start=penalty_start)
1148 self.tv = TotalVariation(tv, A=A, mu=mu, penalty_start=penalty_start)
1149 if weights is None:
1150 weights = np.ones(y.shape)
1151 self.weights = weights
1152 self.penalty_start = penalty_start
1153 self.mean = mean
1154
1155 self.reset()
1156
1159 """Compute the duality gap for the logistic function.
1160
1161 From the interface "DualFunction".
1162 """
1163 if self.penalty_start > 0:
1164 beta_ = beta[self.penalty_start:, :]
1165 else:
1166 beta_ = beta
1167
1168 n = float(self.X.shape[0])
1169 alpha = self.tv.alpha(beta_)
1170 g = self.fmu(beta_)
1171 Xbeta = np.dot(self.X, beta_)
1172 pi = np.reciprocal(1.0 + np.exp(-Xbeta))
1173
1174
1175 scale = 1.0 / n if self.mean else 1.
1176
1177
1178
1179 a = (pi - self.y) * (self.weights * scale)
1180 b = ((1. / (self.weights * scale)) * a) + self.y
1181 f_ = np.sum((b * np.log(b) + (1 - b)
1182 * np.log(1 - b)) * (self.weights * scale))
1183
1184 lAta = self.tv.l * self.tv.Aa(alpha)
1185 if self.penalty_start > 0:
1186 lAta = np.vstack((np.zeros((self.penalty_start, 1)),
1187 lAta))
1188
1189 alpha_sqsum = 0.0
1190 for a_ in alpha:
1191 alpha_sqsum += np.sum(a_ ** 2.0)
1192
1193 z = -np.dot(self.X.T, a)
1194 h_ = (1.0 / (2 * self.rr.k)) \
1195 * np.sum(maths.positive(np.abs(z - lAta) - self.l1.l) ** 2.0) \
1196 + (0.5 * self.tv.l * self.tv.get_mu() * alpha_sqsum)
1197
1198 gap = g + f_ + h_
1199
1200 return gap
1201
1203 """Combination (sum) of RidgeLogisticRegression, L1 and TotalVariation.
1204 """
1205 - def __init__(self, X, y, l1, l2, gl, A=None, mu=0.0, weights=None,
1206 penalty_start=0, mean=True):
1207 """
1208 Parameters
1209 ----------
1210 X : Numpy array. The X matrix (n-by-p) for the logistic regression.
1211
1212 y : Numpy array. The y vector for the logistic regression.
1213
1214 l1 : Non-negative float. The Lagrange multiplier, or regularisation
1215 constant, for the L1 penalty.
1216
1217 l2 : Non-negative float. The Lagrange multiplier, or regularisation
1218 constant, for the ridge (L2) penalty.
1219
1220 gl : Non-negative float. The Lagrange multiplier, or regularisation
1221 constant, of the smoothed function.
1222
1223 A : Numpy array (usually sparse). The linear operator for the Nesterov
1224 formulation for GL. May not be None!
1225
1226 mu : Non-negative float. The regularisation constant for the smoothing
1227 of the GL function.
1228
1229 weights: List with n elements. The sample's weights.
1230
1231 penalty_start : Non-negative integer. The number of columns, variables
1232 etc., to except from penalisation. Equivalently, the first
1233 index to be penalised. Default is 0, all columns are included.
1234 """
1235 self.X = X
1236 self.y = y
1237
1238 self.rr = RidgeLogisticRegression(X, y, l2, weights=weights, mean=mean)
1239 self.l1 = L1(l1, penalty_start=penalty_start)
1240 self.gl = GroupLassoOverlap(gl, A=A, mu=mu,
1241 penalty_start=penalty_start)
1242
1243 self.penalty_start = penalty_start
1244 self.mean = mean
1245
1246 self.reset()
1247
1248
1249 -class LinearRegressionL2SmoothedL1TV(properties.CompositeFunction,
1250 properties.NesterovFunction,
1251 properties.GradientMap,
1252 properties.DualFunction,
1253 properties.StronglyConvex):
1254 """Combination (sum) of Linear Regression, L2 and simultaneously smoothed
1255 L1 and TotalVariation.
1256
1257 Parameters
1258 ----------
1259 X : Numpy array. The X matrix for the ridge regression.
1260
1261 y : Numpy array. The y vector for the ridge regression.
1262
1263 l2 : Non-negative float. The Lagrange multiplier, or regularisation
1264 constant, for the ridge (L2) penalty.
1265
1266 l1 : Non-negative float. The Lagrange multiplier, or regularisation
1267 constant, for the L1 penalty.
1268
1269 tv : Non-negative float. The Lagrange multiplier, or regularisation
1270 constant, of the TV function.
1271
1272 A : A list or tuple with 4 elements of (usually sparse) arrays. The linear
1273 operator for the smoothed L1+TV. The first element must be the
1274 linear operator for L1 and the following three for TV. May not be
1275 None.
1276
1277 mu : Non-negative float. The regularisation constant for the smoothing.
1278
1279 penalty_start : Non-negative integer. The number of columns, variables
1280 etc., to except from penalisation. Equivalently, the first index
1281 to be penalised. Default is 0, all columns are included.
1282
1283 mean : Boolean. Whether to compute the squared loss or the mean squared
1284 loss. Default is True, the mean squared loss.
1285 """
1288
1289 if l2 < consts.TOLERANCE:
1290 raise ValueError("The L2 regularisation constant must be " + \
1291 "non-zero.")
1292
1293 self.X = X
1294 self.y = y
1295
1296 self.g = RidgeRegression(X, y, l2,
1297 penalty_start=penalty_start,
1298 mean=mean)
1299 self.h = L1TV(l1=l1, tv=tv, A=A, mu=mu,
1300 penalty_start=penalty_start)
1301
1302 self.mu = float(mu)
1303
1304 self.penalty_start = max(0, int(penalty_start))
1305 self.mean = bool(mean)
1306
1307 self.reset()
1308
1310
1311 self.g.reset()
1312 self.h.reset()
1313
1314 self._Xy = None
1315 self._XtinvXXtkI = None
1316
1324
1326 """Returns the regularisation constant for the smoothing.
1327
1328 From the interface "NesterovFunction".
1329 """
1330 return self.h.get_mu()
1331
1333 """Sets the regularisation constant for the smoothing.
1334
1335 From the interface "NesterovFunction".
1336
1337 Parameters:
1338 ----------
1339 mu : Non-negative float. The regularisation constant for the smoothing
1340 to use from now on.
1341
1342 Returns:
1343 -------
1344 old_mu : Non-negative float. The old regularisation constant for the
1345 smoothing that was overwritten and is no longer used.
1346 """
1347 return self.h.set_mu(mu)
1348
1349 - def f(self, beta):
1350 """ Function value.
1351 """
1352 return self.g.f(beta) \
1353 + self.h.f(beta)
1354
1355 - def phi(self, alpha, beta):
1356 """ Function value.
1357 """
1358 return self.g.f(beta) \
1359 + self.h.phi(alpha, beta)
1360
1363
1365 """Lipschitz constant of the gradient.
1366
1367 From the interface "LipschitzContinuousGradient".
1368 """
1369
1370 b = self.parameter()
1371
1372 a = self.h.lambda_max()
1373
1374 return a / b
1375
1377 """Returns the strongly convex parameter for the function.
1378
1379 From the interface "StronglyConvex".
1380 """
1381 return self.g.parameter()
1382
1383 - def V(self, alpha, beta, L):
1384 """The gradient map associated to the function.
1385
1386 From the interface "GradientMap".
1387 """
1388 if self.penalty_start > 0:
1389 beta_ = beta[self.penalty_start:, :]
1390 else:
1391 beta_ = beta
1392
1393 if L < consts.TOLERANCE:
1394 L = consts.TOLERANCE
1395
1396 A = self.h.A()
1397 a = [0] * len(A)
1398 a[0] = (1.0 / L) * A[0].dot(beta_)
1399 a[1] = (1.0 / L) * A[1].dot(beta_)
1400 a[2] = (1.0 / L) * A[2].dot(beta_)
1401 a[3] = (1.0 / L) * A[3].dot(beta_)
1402
1403 u_new = [0] * len(alpha)
1404 for i in xrange(len(alpha)):
1405 u_new[i] = alpha[i] + a[i]
1406
1407 return self.h.project(u_new)
1408
1410 """ Dual variable of the Nesterov function.
1411
1412 From the interface "NesterovFunction".
1413 """
1414 return self.h.alpha(beta)
1415
1416 - def betahat(self, alpha, beta=None):
1417 """ Returns the beta that minimises the dual function.
1418
1419 From the interface "DualFunction".
1420 """
1421
1422
1423
1424 A = self.h.A()
1425 lAta = A[0].T.dot(alpha[0])
1426 lAta += A[1].T.dot(alpha[1])
1427 lAta += A[2].T.dot(alpha[2])
1428 lAta += A[3].T.dot(alpha[3])
1429
1430 if self.penalty_start > 0:
1431 lAta = np.vstack((np.zeros((self.penalty_start, 1)),
1432 lAta))
1433
1434
1435
1436 n = float(self.X.shape[0])
1437
1438 if self._Xy is None:
1439 if self.mean:
1440 self._Xy = np.dot(self.X.T, self.y) * (1.0 / n)
1441 else:
1442 self._Xy = np.dot(self.X.T, self.y)
1443
1444 Xty_lAta = (self._Xy - lAta) * (1.0 / self.g.k)
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454 if self._XtinvXXtkI is None:
1455 XXtkI = np.dot(self.X, self.X.T)
1456 index = np.arange(min(XXtkI.shape))
1457 if self.mean:
1458 XXtkI[index, index] += self.g.k * n
1459 else:
1460 XXtkI[index, index] += self.g.k
1461 invXXtkI = np.linalg.inv(XXtkI)
1462 self._XtinvXXtkI = np.dot(self.X.T, invXXtkI)
1463
1464 beta = (Xty_lAta - np.dot(self._XtinvXXtkI, np.dot(self.X, Xty_lAta)))
1465
1466 return beta
1467
1468 - def gap(self, beta, beta_hat):
1469 """Compute the duality gap.
1470
1471 From the interface "DualFunction".
1472 """
1473
1474 raise NotImplementedError("We cannot currently do this!")
1475
1477 """Computes a "good" value of mu with respect to the given beta.
1478
1479 From the interface "NesterovFunction".
1480 """
1481 raise NotImplementedError("We do not use this here!")
1482
1484 """ The maximum value of the regularisation of the dual variable. We
1485 have
1486
1487 M = max_{alpha in K} 0.5*|alpha|²_2.
1488
1489 From the interface "NesterovFunction".
1490 """
1491 return self.h.M()
1492
1494 """ Projection onto the compact space of the Nesterov function.
1495
1496 From the interface "NesterovFunction".
1497 """
1498 return self.h.project(a)
1499
1503 """Combination (sum) of LinearRegression, L1, L2 and 1D TotalVariation
1504 with a linear constraint. Represents the problem
1505
1506 min. f(b) = g(x)
1507 + h(r)
1508 = (1 / 2) ||Xb - y||² + (k / 2) ||b||²
1509 + l ||r_1||_1 + g ||r_tv||_1,
1510 s.t. r = Db.
1511
1512 The proximal operators of the splittable functions are assumed to be from
1513 augmented Lagrangians.
1514
1515 Note: This function only works for 1-dimensional total variation.
1516
1517 Parameters
1518 ----------
1519 X : Numpy array. The X matrix for the linear regression.
1520
1521 y : Numpy array. The y vector for the linear regression.
1522
1523 l1 : Non-negative float. The Lagrange multiplier, or regularisation
1524 constant, for the L1 penalty.
1525
1526 l2 : Non-negative float. The Lagrange multiplier, or regularisation
1527 constant, for the ridge penalty.
1528
1529 tv : Non-negative float. The Lagrange multiplier, or regularisation
1530 constant, of the total variation function.
1531
1532 A : List or tuple of numpy (or usually sparse scipy) arrays. The linear
1533 operator for the constraints.
1534
1535 rho : Positive float. The penalty parameter for the augmented Lagrangian.
1536
1537 penalty_start : Non-negative integer. The number of columns, variables
1538 etc., to except from penalisation. Equivalently, the first
1539 index to be penalised. Default is 0, all columns are included.
1540
1541 mean : Boolean. Whether to compute the squared loss or the mean
1542 squared loss. Default is True, the mean squared loss.
1543 """
1544 - def __init__(self, X, y, l1, l2, tv, A=None, rho=1.0, penalty_start=0,
1545 mean=True):
1546
1547 super(AugmentedLinearRegressionL1L2TV, self).__init__(rho=rho)
1548
1549 self.X = X
1550 self.y = y
1551
1552 class MultipleFunctions(properties.Function,
1553 properties.ProximalOperator):
1554
1555 def __init__(self, functions, rho):
1556 self.funcs = functions
1557 self.rho = rho
1558
1559 def f(self, xrr):
1560 if isinstance(xrr, linalgs.MultipartArray):
1561 xrr = xrr.get_parts()
1562
1563
1564 f = self.funcs[0].f(xrr[0]) * self.rho \
1565 + self.funcs[1].f(xrr[1]) * self.rho
1566
1567 return f
1568
1569 def reset(self):
1570 self.funcs[0].reset()
1571 self.funcs[1].reset()
1572
1573 def prox(self, xrr, factor=1.0, eps=consts.TOLERANCE,
1574 max_iter=100):
1575
1576 if isinstance(xrr, linalgs.MultipartArray):
1577 xrr = xrr.get_parts()
1578
1579 parts = [0, 0]
1580
1581 parts[0] = self.funcs[0].prox(xrr[0], factor=factor,
1582 eps=eps, max_iter=max_iter)
1583 parts[1] = self.funcs[1].prox(xrr[1], factor=factor,
1584 eps=eps, max_iter=max_iter)
1585
1586 return linalgs.MultipartArray(parts, vertical=True)
1587
1588 class L1TV(properties.SplittableFunction,
1589 properties.ProximalOperator):
1590
1591 def __init__(self, l1, tv, p):
1592 self.l1 = float(l1)
1593 self.tv = float(tv)
1594 self.p = max(1, int(p))
1595
1596 self.g = L1(l1)
1597 self.h = L1(tv)
1598
1599 def reset(self):
1600 self.g.reset()
1601 self.h.reset()
1602
1603 def f(self, x):
1604 """Function value.
1605 """
1606 x1 = x[:self.p, :]
1607 x2 = x[self.p:, :]
1608
1609 return self.g.f(x1) \
1610 + self.h.f(x2)
1611
1612 def prox(self, x, factor=1.0, eps=consts.TOLERANCE, max_iter=100):
1613
1614 x1 = x[:self.p, :]
1615 x2 = x[self.p:, :]
1616
1617 y = np.vstack((self.g.prox(x1, factor=factor,
1618 eps=eps, max_iter=max_iter),
1619 self.h.prox(x2, factor=factor,
1620 eps=eps, max_iter=max_iter)))
1621
1622 return y
1623
1624 self.g = MultipleFunctions([RidgeSquaredError(X, y, l2, l=1.0 / rho,
1625 penalty_start=penalty_start,
1626 mean=mean),
1627 L1TV(l1 / rho, tv / rho, A[0].shape[1])],
1628 rho)
1629
1630 if len(A) == 4:
1631 A = A[:2]
1632
1633 self.h = LinearVariableConstraint(A, penalty_start=penalty_start,
1634 solver=linalgs.TridiagonalSolver())
1635
1636 self.penalty_start = max(0, int(penalty_start))
1637 self.mean = bool(mean)
1638
1639 self.reset()
1640
1645
1653
1654 - def prox(self, x, **kwargs):
1655
1656 raise NotImplementedError("Use the prox of the parts of the " \
1657 "splitted function, g.prox() and h.prox().")
1658
1660 """Update the penalty parameter.
1661
1662 From the interface "AugmentedProximalOperator".
1663 """
1664 rho = max(0.0, float(rho))
1665
1666
1667 self.g.funcs[0].l = 1.0 / rho
1668
1669 self.g.funcs[1].g.l = self.g.funcs[1].l1 / rho
1670
1671 self.g.funcs[1].h.l = self.g.funcs[1].tv / rho
1672
1673
1674 self.g.rho = rho
1675
1676
1677 self.rho = rho
1678
1679
1680 -class PrincipalComponentAnalysisL1TV(properties.CompositeFunction,
1681 properties.NesterovFunction,
1682 properties.Continuation,
1683 properties.DualFunction,
1684 properties.StronglyConvex,
1685 properties.StepSize):
1686 """Combination (sum) of PCA (Variance), L1 and TotalVariation
1687 """
1688 - def __init__(self, X, l, g, A=None, mu=0.0, penalty_start=0):
1689 """
1690 Parameters:
1691 ----------
1692 X : Numpy array. The X matrix for the ridge regression.
1693
1694 l : Non-negative float. The Lagrange multiplier, or regularisation
1695 constant, for the L1 penalty.
1696
1697 g : Non-negative float. The Lagrange multiplier, or regularisation
1698 constant, of the smoothed TV function.
1699
1700 A : Numpy array (usually sparse). The linear operator for the Nesterov
1701 formulation of TV. May not be None!
1702
1703 mu : Non-negative float. The regularisation constant for the smoothing
1704 of the TV function.
1705
1706 penalty_start : Non-negative integer. The number of columns, variables
1707 etc., to except from penalisation. Equivalently, the first
1708 index to be penalised. Default is 0, all columns are included.
1709 """
1710 self.X = X
1711 self.pca = LatentVariableVariance(X)
1712 self.l1 = L1(l, penalty_start=penalty_start)
1713 self.tv = TotalVariation(g, A=A, mu=mu, penalty_start=penalty_start)
1714
1715 self.reset()
1716
1718
1719 self.pca.reset()
1720 self.l1.reset()
1721 self.tv.reset()
1722
1723 self._Xty = None
1724 self._invXXkI = None
1725 self._XtinvXXtkI = None
1726
1734
1736 """Returns the regularisation constant for the smoothing.
1737
1738 From the interface "NesterovFunction".
1739 """
1740 return self.tv.get_mu()
1741
1743 """Sets the regularisation constant for the smoothing.
1744
1745 From the interface "NesterovFunction".
1746
1747 Parameters:
1748 ----------
1749 mu : Non-negative float. The regularisation constant for the smoothing
1750 to use from now on.
1751
1752 Returns:
1753 -------
1754 old_mu : Non-negative float. The old regularisation constant for the
1755 smoothing that was overwritten and is no longer used.
1756 """
1757 return self.tv.set_mu(mu)
1758
1759 - def f(self, beta):
1760 """Function value.
1761 """
1762 return self.pca.f(beta) \
1763 + self.l1.f(beta) \
1764 + self.tv.f(beta)
1765
1766 - def fmu(self, beta, mu=None):
1767 """Function value.
1768 """
1769 return self.pca.f(beta) \
1770 + self.l1.f(beta) \
1771 + self.tv.fmu(beta, mu)
1772
1773 - def phi(self, alpha, beta):
1774 """ Function value with known alpha.
1775 """
1776 return self.pca.f(beta) \
1777 + self.l1.f(beta) \
1778 + self.tv.phi(alpha, beta)
1779
1780 - def grad(self, beta):
1781 """Gradient of the differentiable part of the function.
1782
1783 From the interface "Gradient".
1784 """
1785 return self.pca.grad(beta) \
1786 + self.tv.grad(beta)
1787
1789 """Lipschitz constant of the gradient.
1790
1791 From the interface "LipschitzContinuousGradient".
1792 """
1793 return self.pca.L() \
1794 + self.tv.L()
1795
1796 - def prox(self, beta, factor=1.0, **kwargs):
1797 """The proximal operator of the non-differentiable part of the
1798 function.
1799
1800 From the interface "ProximalOperator".
1801 """
1802 return self.l1.prox(beta, factor, **kwargs)
1803
1805 """Computes a "good" value of mu with respect to the given beta.
1806
1807 From the interface "NesterovFunction".
1808 """
1809 return self.tv.estimate_mu(beta)
1810
1812 """The maximum value of the regularisation of the dual variable. We
1813 have
1814
1815 M = max_{alpha in K} 0.5*|alpha|²_2.
1816
1817 From the interface "NesterovFunction".
1818 """
1819 return self.tv.M()
1820
1822 """The optimal value of mu given epsilon.
1823
1824 From the interface "Continuation".
1825 """
1826 gM = self.tv.l * self.tv.M()
1827
1828
1829
1830 old_mu = self.tv.set_mu(1.0)
1831 gA2 = self.tv.L()
1832 self.tv.set_mu(old_mu)
1833
1834 Lg = self.rr.L()
1835
1836 return (-gM * gA2 + np.sqrt((gM * gA2) ** 2.0
1837 + gM * Lg * gA2 * eps)) \
1838 / (gM * Lg)
1839
1841 """The optimal value of epsilon given mu.
1842
1843 From the interface "Continuation".
1844 """
1845 gM = self.tv.l * self.tv.M()
1846
1847
1848
1849 old_mu = self.tv.set_mu(1.0)
1850 gA2 = self.tv.L()
1851 self.tv.set_mu(old_mu)
1852
1853 Lg = self.rr.L()
1854
1855 return (2.0 * gM * gA2 * mu
1856 + gM * Lg * mu ** 2.0) \
1857 / gA2
1858
1860 """The maximum value of epsilon.
1861
1862 From the interface "Continuation".
1863
1864 Parameters
1865 ----------
1866 mu : Positive float. The regularisation constant of the smoothing.
1867
1868 Returns
1869 -------
1870 eps : Positive float. The upper limit, the maximum, precision.
1871 """
1872 gM = self.tv.l * self.tv.M()
1873
1874 return float(mu) * gM
1875
1877 """The maximum value of mu.
1878
1879 From the interface "Continuation".
1880
1881 Parameters
1882 ----------
1883 eps : Positive float. The maximum precision of the smoothing.
1884
1885 Returns
1886 -------
1887 mu : Positive float. The upper limit, the maximum, of the
1888 regularisation constant of the smoothing.
1889 """
1890 gM = self.tv.l * self.tv.M()
1891
1892 return float(eps) / gM
1893
1894 - def betahat(self, alphak, betak):
1895 """ Returns the beta that minimises the dual function. Used when we
1896 compute the gap.
1897
1898 From the interface "DualFunction".
1899 """
1900 raise NotImplementedError('Abstract method "betahat" must be '
1901 'specialised!')
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936 - def gap(self, beta, beta_hat=None):
1937 """Compute the duality gap.
1938
1939 From the interface "DualFunction".
1940 """
1941 raise NotImplementedError('Abstract method "gap" must be '
1942 'specialised!')
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1958 """Linear operator of the Nesterov function.
1959
1960 From the interface "NesterovFunction".
1961 """
1962 return self.tv.A()
1963
1964 - def Aa(self, alpha):
1965 """Computes A^\T\alpha.
1966
1967 From the interface "NesterovFunction".
1968 """
1969 return self.tv.Aa(alpha)
1970
1972 """ Projection onto the compact space of the Nesterov function.
1973
1974 From the interface "NesterovFunction".
1975 """
1976 return self.tv.project(a)
1977
1979 """Returns the strongly convex parameter for the function.
1980
1981 From the interface "StronglyConvex".
1982 """
1983 return self.rr.k
1984
1985 - def step(self, x):
1986 """The step size to use in descent methods.
1987
1988 From the interface "StepSize".
1989
1990 Parameters
1991 ----------
1992 x : Numpy array. The point at which to evaluate the step size.
1993 """
1994 return 1.0 / self.L()
1995