Package parsimony :: Package utils :: Module classif_label
[hide private]
[frames] | no frames]

Source Code for Module parsimony.utils.classif_label

 1  # -*- coding: utf-8 -*- 
 2  """ 
 3  Created on Mon Mar 10 13:52:23 2014 
 4   
 5  Copyright (c) 2013-2014, CEA/DSV/I2BM/Neurospin. All rights reserved. 
 6   
 7  @author:  Edouard Duchesnay 
 8  @email:   edouard.duchesnay@cea.fr 
 9  @license: BSD 3-clause. 
10  """ 
11  import numpy as np 
12   
13   
14 -def class_weight_to_sample_weight(class_weight, y):
15 """Estimate class weights for unbalanced datasets. 16 17 Parameters 18 ---------- 19 class_weight : dict, 'auto' or None 20 If 'auto', class weights will be given inverse proportional 21 to the frequency of the class in the data. sample_weight will sum 22 to n_sample. 23 If a dictionary is given, keys are classes and values 24 are corresponding class weights. With two classes in {1, 0}, 25 class_weight = {0:0.5, 1:0.5} is equivalent to class_weight == "auto" 26 If None is given, the class weights will be uniform sample_weight==1. 27 28 y : array-like, shape (n_samples,) 29 Array of original class labels per sample; 30 31 Returns 32 ------- 33 weight_vect : ndarray, shape (n_samples,) 34 Array with weight_vect[i] the weight for i-th sample 35 36 Example 37 ------- 38 >>> y = [1, 1, 1, 0, 0, 2] 39 >>> w = class_weight_to_sample_weight("auto", y) 40 >>> print w.sum() == len(y) 41 True 42 >>> print ["%i:%.2f" % (l, np.sum(w[y==l])) for l in np.unique(y)] 43 ['0:2.00', '1:2.00', '2:2.00'] 44 >>> y = [1, 1, 1, 0, 0, 2] 45 >>> w2 = class_weight_to_sample_weight({0:1./3, 1:1./3, 2:1./3}, y) 46 >>> np.all(w2 == w) 47 True 48 """ 49 if class_weight is None or len(class_weight) == 0: 50 # uniform class weights 51 return np.ones(y.shape, dtype=np.float64) 52 # wik = n / nk * pk 53 # pk: desire prior of class k (sum pk == 1) 54 y = np.asarray(y) 55 classes = np.unique(y) 56 nk = np.bincount(y.astype(int).ravel()) 57 n = float(y.shape[0]) 58 if class_weight == 'auto': 59 pk = 1. / classes.shape[0] 60 else: 61 if not isinstance(class_weight, dict): 62 raise ValueError("class_weight must be dict, 'auto', or None," 63 " got: %r" % class_weight) 64 pk = np.array([class_weight[k] for k in classes]) 65 wk = n / nk * pk 66 sample_weight = wk[np.searchsorted(classes, y)] 67 return sample_weight
68 69
70 -def check_labels(y):
71 """ensure binary classification with 0, 1 labels""" 72 nlevels = 2 73 classes = np.unique(y) 74 if len(classes) > nlevels: 75 raise ValueError("Multinomial classification with more " \ 76 "than %i labels is not possible" % nlevels) 77 classes_recoded = np.arange(len(classes)) 78 if np.all(classes_recoded == classes): 79 return y 80 # Ensure labels are 0, 1 81 y_recoded = np.zeros(y.shape, dtype=np.float64) 82 for i in xrange(len(classes)): 83 y_recoded[y == classes[i]] = classes_recoded[i] 84 return y_recoded
85