1
2 """
3 The :mod:`parsimony.functions.properties` module contains base classes that are
4 used to assign properties, i.e. functionality of the functions.
5
6 Try to keep the inheritance tree loop-free unless absolutely impossible.
7
8 Created on Mon Apr 22 10:54:29 2013
9
10 Copyright (c) 2013-2014, CEA/DSV/I2BM/Neurospin. All rights reserved.
11
12 @author: Tommy Löfstedt, Vincent Guillemot, Edouard Duchesnay and
13 Fouad Hadj-Selem
14 @email: lofstedt.tommy@gmail.com, edouard.duchesnay@cea.fr
15 @license: BSD 3-clause.
16 """
17 import abc
18
19 import numpy as np
20 import scipy.sparse as sparse
21
22 import parsimony.utils.maths as maths
23 import parsimony.utils.consts as consts
24
25 __all__ = ["Function", "AtomicFunction", "CompositeFunction",
26 "Penalty", "Constraint",
27 "ProximalOperator", "ProjectionOperator",
28 "CombinedProjectionOperator",
29 "Continuation",
30 "Gradient", "Hessian", "LipschitzContinuousGradient", "StepSize",
31 "GradientMap", "DualFunction", "Eigenvalues", "StronglyConvex",
32 "OR"]
36
37 __metaclass__ = abc.ABCMeta
38
39 @abc.abstractmethod
40 - def f(self, *args, **kwargs):
41 """Function value.
42 """
43 raise NotImplementedError('Abstract method "f" must be '
44 'specialised!')
45
47 """Free any cached computations from previous use of this Function.
48 """
49 pass
50
52
53 ret = dict()
54 for k in args:
55 ret[k] = getattr(self, k)
56
57 return ret
58
60
61 for k in kwargs:
62 setattr(self, k, kwargs[k])
63
66 """This is a function that is not in general supposed to be minimised by
67 itself. Instead it should be combined with other atomic functions and
68 composite functions into composite functions.
69 """
70 __metaclass__ = abc.ABCMeta
71
74 """This is a function that is the combination (e.g. sum) of other
75 composite or atomic functions. It may also be a constrained function.
76 """
77 __metaclass__ = abc.ABCMeta
78
94 """Represents an indicator function.
95
96 I.e. f(x) = 0 if x is in the associated set and infinity otherwise.
97 """
98 __metaclass__ = abc.ABCMeta
99
100 @abc.abstractmethod
101 - def f(self, *args, **kwargs):
102 """Function value.
103 """
104 raise NotImplementedError('Abstract method "f" must be '
105 'specialised!')
106
109 """Represents a function that is the sum of two other functions such that
110
111 f(x) = g(x) + h(x),
112
113 i.e. that
114
115 self.f(x) = self.g.f(x) + self.h.f(x).
116
117 The first function, g(x), is accessed as self.g(...) and the second
118 function, h(x), is accessed as self.h(...).
119 """
120 __metaclass__ = abc.ABCMeta
121
123 """Function value.
124 """
125 return self.g.f(x) \
126 + self.h.f(x)
127
130 """Represents the penalisation of a function.
131
132 Penalties must take a parameter penalty_start, with default value 0.
133 Columns, variables or corresponding entities with indices smaller than
134 penalty_start must not be penalised.
135
136 Parameters
137 ----------
138 penalty_start : Non-negative integer. The number of columns, variables
139 etc., to except from penalisation. Equivalently, the first index
140 to be penalised. Default is 0, all columns are included.
141 """
142 __metaclass__ = abc.ABCMeta
143
147 """Represents a constraint of a function.
148
149 Constraints must take a parameter penalty_start, with default value 0.
150 Columns, variables or corresponding entities with indices smaller than
151 penalty_start must not be penalised.
152
153 Parameters
154 ----------
155 penalty_start : The number of columns, variables etc., to except from
156 penalisation. Equivalently, the first index to be penalised.
157 Default is 0, all columns are included.
158 """
159 __metaclass__ = abc.ABCMeta
160
161 @abc.abstractmethod
163 """Feasibility of the constraint at point x.
164 """
165 raise NotImplementedError('Abstract method "feasible" must be '
166 'specialised!')
167
170
171 __metaclass__ = abc.ABCMeta
172
173 @abc.abstractmethod
176 """The proximal operator corresponding to the function.
177
178 Parameters
179 ----------
180 x : Numpy array (p-by-1). The point at which to apply the proximal
181 operator.
182
183 factor : Positive float. A factor by which the Lagrange multiplier is
184 scaled. This is usually the step size.
185
186 eps : Positive float. This is the stopping criterion for inexact
187 proximal methods, where the proximal operator is approximated
188 numerically.
189
190 max_iter : Positive integer. This is the maximum number of iterations
191 for inexact proximal methods, where the proximal operator is
192 approximated numerically.
193
194 index : Non-negative integer. For multivariate functions, this
195 identifies the variable for which the proximal operator is
196 associated.
197 """
198 raise NotImplementedError('Abstract method "prox" must be '
199 'specialised!')
200
203 """Given the problem
204
205 min. f(x)
206 s.t. x = z
207
208 the augmented Lagrangian is
209
210 L(x) = f(x) + y'(x - z) + (rho / 2) * ||x - z||²
211 === f(x) + (rho / 2) * ||x - z + u||²
212 = prox_{(1 / rho) * f}(z - u)
213
214 where y = rho * u is a dual variable associated to the constraint x = z,
215 and ||.||² is the squared L2 norm. We note that this is the proximal
216 operator of f(x) at the point z - u.
217
218 This Function represents the proximal operator of f at z - u, given the
219 augmented Lagrangian.
220
221 Parameters
222 ----------
223 rho : Non-negative float. The regularisation constant for the augmented
224 Lagrangian.
225 """
226 __metaclass__ = abc.ABCMeta
227
229
230 self.rho = max(0.0, float(rho))
231
233 """Update the penalty parameter.
234 """
235 rho = max(0.0, float(rho))
236 self.rho = rho
237
240
241 __metaclass__ = abc.ABCMeta
242
243 @abc.abstractmethod
245 """The projection operator corresponding to the function.
246 """
247 raise NotImplementedError('Abstract method "proj" must be '
248 'specialised!')
249
253
264
266
267 val = 0
268 for func in self.functions:
269 val += func.f(x)
270
271 return val
272
274 """The projection operator corresponding to the function.
275
276 From the interface "ProjectionOperator".
277 """
278 proj = self.proj_op.run(self.functions, x)
279
280 return proj
281
284
285 __metaclass__ = abc.ABCMeta
286
287 @abc.abstractmethod
289 """The optimal value of mu given epsilon.
290
291 Parameters
292 ----------
293 eps : Positive float. The desired precision.
294
295 Returns
296 -------
297 mu : Positive float. The optimal regularisation parameter.
298 """
299 raise NotImplementedError('Abstract method "mu_opt" must be '
300 'specialised!')
301
302 @abc.abstractmethod
304 """The optimal value of epsilon given mu.
305
306 Parameters
307 ----------
308 mu : Positive float. The regularisation constant of the smoothing.
309
310 Returns
311 -------
312 eps : Positive float. The optimal precision.
313 """
314 raise NotImplementedError('Abstract method "eps_opt" must be '
315 'specialised!')
316
317 @abc.abstractmethod
319 """The maximum value of epsilon.
320
321 Parameters
322 ----------
323 mu : Positive float. The regularisation constant of the smoothing.
324
325 Returns
326 -------
327 eps : Positive float. The upper limit, the maximum, precision.
328 """
329 raise NotImplementedError('Abstract method "eps_max" must be '
330 'specialised!')
331
332 @abc.abstractmethod
334 """The maximum value of mu.
335
336 Parameters
337 ----------
338 eps : Positive float. The maximum precision of the smoothing.
339
340 Returns
341 -------
342 mu : Positive float. The upper limit, the maximum, of the
343 regularisation constant of the smoothing.
344 """
345 raise NotImplementedError('Abstract method "mu_max" must be '
346 'specialised!')
347
350
351 __metaclass__ = abc.ABCMeta
352
353 @abc.abstractmethod
354 - def grad(self, beta):
355 """Gradient of the function.
356
357 Parameters
358 ----------
359 beta : Numpy array (p-by-1). The point at which to evaluate the
360 gradient.
361 """
362 raise NotImplementedError('Abstract method "grad" must be '
363 'specialised!')
364
366 """Numerical approximation of the gradient.
367
368 Parameters
369 ----------
370 beta : Numpy array (p-by-1). The point at which to evaluate the
371 gradient.
372
373 eps : Positive integer. The precision of the numerical solution.
374 Smaller is better, but too small may result in floating point
375 precision errors.
376 """
377 p = x.shape[0]
378 grad = np.zeros(x.shape)
379 if isinstance(self, (Penalty, Constraint)):
380 start = self.penalty_start
381 else:
382 start = 0
383 for i in xrange(start, p):
384 x[i, 0] -= eps
385 loss1 = self.f(x)
386 x[i, 0] += 2.0 * eps
387 loss2 = self.f(x)
388 x[i, 0] -= eps
389 grad[i, 0] = (loss2 - loss1) / (2.0 * eps)
390
391 return grad
392
395
396 __metaclass__ = abc.ABCMeta
397
398 @abc.abstractmethod
399 - def hessian(self, beta, vector=None):
400 """The Hessian of the function.
401
402 Parameters
403 ----------
404 beta : The point at which to evaluate the Hessian.
405
406 vector : If not None, it is multiplied with the Hessian from the right.
407 """
408 raise NotImplementedError('Abstract method "hessian" must be '
409 'specialised!')
410
411 @abc.abstractmethod
413 """Inverse of the Hessian (second derivative) of the function.
414
415 Sometimes this can be done efficiently if we know the structure of the
416 Hessian. Also, if we multiply the Hessian by a vector, it is often
417 possible to do efficiently.
418
419 Parameters
420 ----------
421 beta : The point at which to evaluate the Hessian.
422
423 vector : If not None, it is multiplied with the inverse of the Hessian
424 from the right.
425 """
426 raise NotImplementedError('Abstract method "hessian_inverse" must be '
427 'specialised!')
428
431
432 __metaclass__ = abc.ABCMeta
433
434
435 @abc.abstractmethod
437 """Lipschitz constant of the gradient.
438 """
439 raise NotImplementedError('Abstract method "L" must be '
440 'specialised!')
441
442 - def approx_L(self, shape, max_iter=10000):
443 """Monte Carlo approximation of the Lipschitz constant.
444
445 Warning: This will not yield a good approximation within reasonable
446 time for very large data sets. Use only if you know what you are doing.
447
448 Parameters
449 ----------
450 shape : List or tuple. Usually has the form (p, 1). The shape of the
451 points which we draw randomly.
452 """
453 L = -float("inf")
454 for i in xrange(max_iter):
455 a = np.random.rand(*shape) * 2.0 - 1.0
456 b = np.random.rand(*shape) * 2.0 - 1.0
457 grad_a = self.grad(a)
458 grad_b = self.grad(b)
459 L_ = maths.norm(grad_a - grad_b) / maths.norm(a - b)
460 L = max(L, L_)
461
462 return L
463
466
467 __metaclass__ = abc.ABCMeta
468
469 @abc.abstractmethod
470 - def step(self, beta, index=0):
471 """The step size to use in descent methods.
472
473 Parameters
474 ----------
475 beta : Numpy array. The point at which to determine the step size.
476
477 index : Non-negative integer. For multiblock functions, to know which
478 variable the step is for.
479 """
480 raise NotImplementedError('Abstract method "step" must be '
481 'specialised!')
482
485
486 __metaclass__ = abc.ABCMeta
487
488 @abc.abstractmethod
489 - def V(self, alpha, beta, L):
490 """The gradient map associated to the function.
491 """
492 raise NotImplementedError('Abstract method "V" must be '
493 'specialised!')
494
497
498 __metaclass__ = abc.ABCMeta
499
500 @abc.abstractmethod
503 """Compute a duality gap.
504 """
505 raise NotImplementedError('Abstract method "gap" must be '
506 'specialised!')
507
508 @abc.abstractmethod
511 """Return the beta that minimises the dual function.
512 """
513 raise NotImplementedError('Abstract method "betahat" must be '
514 'specialised!')
515
518
519 __metaclass__ = abc.ABCMeta
520
521 @abc.abstractmethod
523 """Largest eigenvalue of the corresponding covariance matrix.
524 """
525 raise NotImplementedError('Abstract method "lambda_max" must be '
526 'specialised!')
527
529 """Smallest eigenvalue of the corresponding covariance matrix.
530 """
531 raise NotImplementedError('Abstract method "lambda_min" is not '
532 'implemented!')
533
536 """Represents strongly convex functions.
537
538 A function is strongly convex with parameter m if
539
540 (grad(f(x) - grad(f(y))'(x - y) >= m.||x - y||²_2,
541
542 or equivalently if
543
544 H(f(x)) >= mI,
545
546 where H is the Hessian, I is the identity matrix. The second ">=" means
547 that H(f(x)) - mI is positive semi-definite.
548 """
549 __metaclass__ = abc.ABCMeta
550
551 @abc.abstractmethod
553 """Returns the strongly convex parameter for the function.
554 """
555 raise NotImplementedError('Abstract method "parameter" is not '
556 'implemented!')
557
558
559 -class OR(object):
561 self.classes = classes
562
564 for c in self.classes:
565 if isinstance(function, c):
566 return True
567 return False
568
570 string = str(self.classes[0])
571 for i in xrange(1, len(self.classes)):
572 string = string + " OR " + str(self.classes[i])
573
574
575 -class NesterovFunction(Gradient,
576 LipschitzContinuousGradient,
577 Eigenvalues,
578 ProximalOperator):
579 """Abstract superclass of Nesterov functions.
580
581 Attributes:
582 ----------
583 l : Non-negative float. The Lagrange multiplier, or regularisation
584 constant, of the function.
585
586 mu : Non-negative float. The Nesterov function regularisation constant for
587 the smoothing.
588
589 penalty_start : Non-negative integer. The number of columns, variables
590 etc., to except from penalisation. Equivalently, the first index
591 to be penalised. Default is 0, all columns are included.
592 """
593 __metaclass__ = abc.ABCMeta
594
596 """
597 Parameters
598 ----------
599 l : Non-negative float. The Lagrange multiplier, or regularisation
600 constant, of the function.
601
602 A : A (usually sparse) array. The linear operator for the Nesterov
603 formulation. May not be None!
604
605 mu: Non-negative float. The regularisation constant for the smoothing.
606
607 penalty_start : Non-negative integer. The number of columns, variables
608 etc., to except from penalisation. Equivalently, the first
609 index to be penalised. Default is 0, all columns are included.
610 """
611 self.l = float(l)
612 if A is None:
613 raise ValueError("The linear operator A must not be None.")
614 self._A = A
615 self.mu = float(mu)
616 self.penalty_start = int(penalty_start)
617
618 self._alpha = None
619
620 - def fmu(self, beta, mu=None):
621 """Returns the smoothed function value.
622
623 Parameters
624 ----------
625 beta : Numpy array. A weight vector.
626
627 mu : Non-negative float. The regularisation constant for the smoothing.
628 """
629 if mu is None:
630 mu = self.get_mu()
631
632 alpha = self.alpha(beta)
633 alpha_sqsum = 0.0
634 for a in alpha:
635 alpha_sqsum += np.sum(a ** 2.0)
636
637 Aa = self.Aa(alpha)
638
639 if self.penalty_start > 0:
640 beta_ = beta[self.penalty_start:, :]
641 else:
642 beta_ = beta
643
644 return self.l * (np.dot(beta_.T, Aa)[0, 0] - (mu / 2.0) * alpha_sqsum)
645
646 @abc.abstractmethod
647 - def phi(self, alpha, beta):
648 """Function value with known alpha.
649 """
650 raise NotImplementedError('Abstract method "phi" must be '
651 'specialised!')
652
653 - def grad(self, beta):
654 """Gradient of the function at beta.
655
656 Parameters
657 ----------
658 beta : Numpy array. The point at which to evaluate the gradient.
659 """
660 if self.l < consts.TOLERANCE:
661 return 0.0
662
663
664 alpha = self.alpha(beta)
665
666 if self.penalty_start > 0:
667 grad = self.l * np.vstack((np.zeros((self.penalty_start, 1)),
668 self.Aa(alpha)))
669 else:
670 grad = self.l * self.Aa(alpha)
671
672
673
674
675 return grad
676
678 """Return the regularisation constant for the smoothing.
679 """
680 return self.mu
681
683 """Set the regularisation constant for the smoothing.
684
685 Parameters
686 ----------
687 mu : Non-negative float. The regularisation constant for the smoothing
688 to use from now on.
689
690 Returns
691 -------
692 old_mu : Non-negative float. The old regularisation constant for the
693 smoothing that was overwritten and no longer is used.
694 """
695 old_mu = self.get_mu()
696
697 self.mu = mu
698
699 return old_mu
700
702 """Dual variable of the Nesterov function.
703
704 Parameters
705 ----------
706 beta : Numpy array (p-by-1). The variable for which to compute the dual
707 variable alpha.
708 """
709 if self.penalty_start > 0:
710 beta_ = beta[self.penalty_start:, :]
711 else:
712 beta_ = beta
713
714 A = self.A()
715 mu = self.get_mu()
716 alpha = [0] * len(A)
717 for i in xrange(len(A)):
718 alpha[i] = A[i].dot(beta_) * (1.0 / mu)
719
720
721 alpha = self.project(alpha)
722
723 return alpha
724
726 """ Linear operator of the Nesterov function.
727 """
728 return self._A
729
731 """ Linear operator of the Nesterov function multiplied by the
732 corresponding Lagrange multipliers.
733
734 Specialise this function if you need to. E.g. if you are smoothing a
735 sum of functions with different Lagrange multipliers.
736 """
737 A = self.A()
738 lA = [0] * len(A)
739 for i in xrange(len(A)):
740 lA[i] = self.l * A[i]
741
742 return lA
743
744 - def Aa(self, alpha):
745 """ Compute A'*alpha.
746
747 Parameters
748 ----------
749 alpha : List of numpy arrays (x-by-1). The dual variable alpha.
750 """
751 A = self.A()
752 Aa = A[0].T.dot(alpha[0])
753 for i in xrange(1, len(A)):
754 Aa += A[i].T.dot(alpha[i])
755
756 return Aa
757
758 @abc.abstractmethod
760 """ Projection onto the compact space of the Nesterov function.
761
762 Parameters
763 ----------
764 alpha : List of numpy arrays (x-by-1). The not-yet-projected dual
765 variable alpha.
766 """
767 raise NotImplementedError('Abstract method "project" must be '
768 'specialised!')
769
770 @abc.abstractmethod
772 """ The maximum value of the regularisation of the dual variable. We
773 have
774
775 M = max_{alpha in K} 0.5*|alpha|²_2.
776 """
777 raise NotImplementedError('Abstract method "M" must be '
778 'specialised!')
779
781 """ Compute a "good" value of mu with respect to the given beta.
782
783 Parameters
784 ----------
785 beta : Numpy array (p-by-1). The primal variable at which to compute a
786 feasible value of mu.
787 """
788 if self.penalty_start > 0:
789 beta_ = beta[self.penalty_start:, :]
790 else:
791 beta_ = beta
792
793 SS = 0.0
794 A = self.A()
795 for i in xrange(len(A)):
796 SS = max(SS, maths.norm(A[i].dot(beta_)))
797
798 return SS
799
801 """ Largest eigenvalue of the corresponding covariance matrix.
802
803 From the interface "Eigenvalues".
804 """
805
806
807
808 if self._lambda_max is None:
809
810 from parsimony.algorithms.nipals import FastSparseSVD
811
812 A = sparse.vstack(self.A())
813
814 v = FastSparseSVD().run(A)
815 us = A.dot(v)
816 self._lambda_max = np.sum(us ** 2.0)
817
818 return self._lambda_max
819
821 """ Lipschitz constant of the gradient.
822
823 From the interface "LipschitzContinuousGradient".
824 """
825 if self.l < consts.TOLERANCE:
826 return 0.0
827
828 lmaxA = self.lambda_max()
829
830 return self.l * lmaxA / self.mu
831
833 """The proximal operator corresponding to this function.
834
835 The proximal operator is computed numerically. This method should be
836 overloaded if the function has a known proximal operator.
837
838 From the interface "ProximalOperator".
839
840 Parameters
841 ----------
842 beta : Numpy array (p-by-1). The point at which to apply the proximal
843 operator.
844
845 factor : Positive float. A factor by which the Lagrange multiplier is
846 scaled. This is usually the step size.
847
848 eps : Positive float. This is the stopping criterion for inexact
849 proximal methods, where the proximal operator is approximated
850 numerically.
851
852 max_iter : Positive integer. This is the maximum number of iterations
853 for inexact proximal methods, where the proximal operator is
854 approximated numerically.
855 """
856
857 class F(Function, Gradient, ProximalOperator, StepSize):
858 def __init__(self, v, A, t, proj):
859 self.v = v
860 self.A = A
861 self.t = t
862 self.proj = proj
863
864 self._step = None
865
866 def f(self, a):
867 return self.t * 0.5 \
868 * maths.norm(self.v - self.t * self.Ata(a)) ** 2.0
869
870 def grad(self, a):
871 return self.Av(-self.t * (self.v - self.t * self.Ata(a)))
872
873 def prox(self, a, factor=1.0, eps=consts.TOLERANCE):
874
875 return self.proj(a)
876
877 def step(self, x, index=0):
878 if self._step is None:
879 from parsimony.algorithms.nipals import FastSparseSVD
880
881
882 A = sparse.vstack(self.A)
883
884 v = FastSparseSVD().run(A)
885 us = A.dot(v)
886 l = np.sum(us ** 2.0)
887
888 self._step = 1.0 / (self.t * self.t * l)
889
890 return self._step
891
892 def Av(self, v):
893 A = self.A
894 a = [0] * len(A)
895 for i in xrange(len(A)):
896 a[i] = A[i].dot(v)
897
898 return a
899
900 def Ata(self, a):
901 A = self.A
902 x = A[0].T.dot(a[0])
903 for i in xrange(1, len(A)):
904 x = x + A[i].T.dot(a[i])
905
906 return x
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922 if self.penalty_start > 0:
923 beta_ = beta[self.penalty_start:, :]
924 else:
925 beta_ = beta
926
927 A = self.lA()
928 t = factor
929 f = F(beta_, A, t, self.project)
930
931 if self._alpha is None:
932 alpha = [0] * len(A)
933 for i in xrange(len(A)):
934 alpha[i] = np.random.rand(A[i].shape[0], 1)
935 alpha = f.prox(alpha)
936 else:
937 alpha = self._alpha
938
939 alpha_ = alpha
940
941
942
943 y_padded = np.zeros(beta.shape)
944 for it in xrange(1, max_iter + 1):
945
946
947
948
949
950 if it == 1:
951 z = alpha
952 else:
953 z = [0] * len(alpha)
954 for i in xrange(len(alpha)):
955 z[i] = alpha[i] \
956 + ((it - 2.0) / (it + 1.0)) * (alpha[i] - alpha_[i])
957
958 alpha_ = alpha
959
960
961 step = f.step(z)
962
963
964 grad = f.grad(z)
965
966
967 for i in xrange(len(z)):
968 z[i] -= step * grad[i]
969
970
971 alpha = f.prox(z)
972
973
974 Aa = A[0].T.dot(alpha[0])
975 for i in xrange(1, len(A)):
976 Aa = Aa + A[i].T.dot(alpha[i])
977 y = beta_ - t * Aa
978 y_padded[self.penalty_start:, :] = y
979 gap = 0.5 * maths.norm(y - beta_) ** 2.0 \
980 + factor * self.f(y_padded) \
981 - 0.5 * (maths.norm(beta_) ** 2.0 - maths.norm(y) ** 2.0)
982
983
984
985
986
987 if gap < eps:
988
989
990 break
991
992 self._alpha = alpha
993
994 if self.penalty_start > 0:
995 y = np.vstack((beta[:self.penalty_start, :],
996 y))
997
998 return y
999