Package mvpa :: Package featsel :: Module base
[hide private]
[frames] | no frames]

Source Code for Module mvpa.featsel.base

  1  # emacs: -*- mode: python; py-indent-offset: 4; indent-tabs-mode: nil -*- 
  2  # vi: set ft=python sts=4 ts=4 sw=4 et: 
  3  ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## 
  4  # 
  5  #   See COPYING file distributed along with the PyMVPA package for the 
  6  #   copyright and license terms. 
  7  # 
  8  ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## 
  9  """Feature selection base class and related stuff base classes and helpers.""" 
 10   
 11  __docformat__ = 'restructuredtext' 
 12   
 13  import numpy as np 
 14   
 15  from mvpa.featsel.helpers import FractionTailSelector 
 16  from mvpa.misc.state import StateVariable, ClassWithCollections 
 17   
 18  if __debug__: 
 19      from mvpa.base import debug 
 20   
21 -class FeatureSelection(ClassWithCollections):
22 """Base class for any feature selection 23 24 Base class for Functors which implement feature selection on the 25 datasets. 26 """ 27 28 selected_ids = StateVariable(enabled=False) 29
30 - def __init__(self, **kwargs):
31 # base init first 32 ClassWithCollections.__init__(self, **kwargs)
33 34
35 - def __call__(self, dataset, testdataset=None):
36 """Invocation of the feature selection 37 38 :Parameters: 39 dataset : Dataset 40 dataset used to select features 41 testdataset : Dataset 42 dataset the might be used to compute a stopping criterion 43 44 Returns a tuple with the dataset containing the selected features. 45 If present the tuple also contains the selected features of the 46 test dataset. Derived classes must provide interface to access other 47 relevant to the feature selection process information (e.g. mask, 48 elimination step (in RFE), etc) 49 """ 50 raise NotImplementedError
51 52
53 - def untrain(self):
54 """ 'Untrain' feature selection 55 56 Necessary for full 'untraining' of the classifiers. By default 57 does nothing, needs to be overridden in corresponding feature 58 selections to pass to the sensitivities 59 """ 60 pass
61 62
63 -class SensitivityBasedFeatureSelection(FeatureSelection):
64 """Feature elimination. 65 66 A `FeaturewiseDatasetMeasure` is used to compute sensitivity maps given a certain 67 dataset. These sensitivity maps are in turn used to discard unimportant 68 features. 69 """ 70 71 sensitivity = StateVariable(enabled=False) 72
73 - def __init__(self, 74 sensitivity_analyzer, 75 feature_selector=FractionTailSelector(0.05), 76 **kwargs 77 ):
78 """Initialize feature selection 79 80 :Parameters: 81 sensitivity_analyzer : FeaturewiseDatasetMeasure 82 sensitivity analyzer to come up with sensitivity 83 feature_selector : Functor 84 Given a sensitivity map it has to return the ids of those 85 features that should be kept. 86 87 """ 88 89 # base init first 90 FeatureSelection.__init__(self, **kwargs) 91 92 self.__sensitivity_analyzer = sensitivity_analyzer 93 """Sensitivity analyzer to use once""" 94 95 self.__feature_selector = feature_selector 96 """Functor which takes care about removing some features."""
97 98
99 - def untrain(self):
100 if __debug__: 101 debug("FS_", "Untraining sensitivity-based FS: %s" % self) 102 self.__sensitivity_analyzer.untrain()
103 104
105 - def __call__(self, dataset, testdataset=None):
106 """Select the most important features 107 108 :Parameters: 109 dataset : Dataset 110 used to compute sensitivity maps 111 testdataset: Dataset 112 optional dataset to select features on 113 114 Returns a tuple of two new datasets with selected feature 115 subset of `dataset`. 116 """ 117 118 sensitivity = self.__sensitivity_analyzer(dataset) 119 """Compute the sensitivity map.""" 120 121 self.sensitivity = sensitivity 122 123 # Select features to preserve 124 selected_ids = self.__feature_selector(sensitivity) 125 126 if __debug__: 127 debug("FS_", "Sensitivity: %s Selected ids: %s" % 128 (sensitivity, selected_ids)) 129 130 # Create a dataset only with selected features 131 wdataset = dataset.selectFeatures(selected_ids) 132 133 if not testdataset is None: 134 wtestdataset = testdataset.selectFeatures(selected_ids) 135 else: 136 wtestdataset = None 137 138 # Differ from the order in RFE when actually error reported is for 139 results = (wdataset, wtestdataset) 140 141 # WARNING: THIS MUST BE THE LAST THING TO DO ON selected_ids 142 if not selected_ids.flags.writeable: 143 # With numpy 1.7 sometimes it returns R/O arrays... not clear yet why. 144 # Dirty fix: work on a copy 145 selected_ids = np.sort(selected_ids) 146 else: 147 selected_ids.sort() 148 self.selected_ids = selected_ids 149 150 # dataset with selected features is returned 151 return results
152 153 # make it accessible from outside 154 sensitivity_analyzer = property(fget=lambda self:self.__sensitivity_analyzer, 155 doc="Measure which was used to do selection")
156 157
158 -class FeatureSelectionPipeline(FeatureSelection):
159 """Feature elimination through the list of FeatureSelection's. 160 161 Given as list of FeatureSelections it applies them in turn. 162 """ 163 164 nfeatures = StateVariable( 165 doc="Number of features before each step in pipeline") 166 # TODO: may be we should also append resultant number of features? 167
168 - def __init__(self, 169 feature_selections, 170 **kwargs 171 ):
172 """Initialize feature selection pipeline 173 174 :Parameters: 175 feature_selections : lisf of FeatureSelection 176 selections which to use. Order matters 177 """ 178 # base init first 179 FeatureSelection.__init__(self, **kwargs) 180 181 self.__feature_selections = feature_selections 182 """Selectors to use in turn"""
183 184
185 - def untrain(self):
186 if __debug__: 187 debug("FS_", "Untraining FS pipeline: %s" % self) 188 for fs in self.__feature_selections: 189 fs.untrain()
190 191
192 - def __call__(self, dataset, testdataset=None, **kwargs):
193 """Invocation of the feature selection 194 """ 195 wdataset = dataset 196 wtestdataset = testdataset 197 198 self.selected_ids = None 199 200 self.nfeatures = [] 201 """Number of features at each step (before running selection)""" 202 203 for fs in self.__feature_selections: 204 205 # enable selected_ids state if it was requested from this class 206 fs.states._changeTemporarily( 207 enable_states=["selected_ids"], other=self) 208 if self.states.isEnabled("nfeatures"): 209 self.nfeatures.append(wdataset.nfeatures) 210 211 if __debug__: 212 debug('FSPL', 'Invoking %s on (%s, %s)' % 213 (fs, wdataset, wtestdataset)) 214 wdataset, wtestdataset = fs(wdataset, wtestdataset, **kwargs) 215 216 if self.states.isEnabled("selected_ids"): 217 if self.selected_ids == None: 218 self.selected_ids = fs.selected_ids 219 else: 220 self.selected_ids = self.selected_ids[fs.selected_ids] 221 222 fs.states._resetEnabledTemporarily() 223 224 return (wdataset, wtestdataset)
225 226 feature_selections = property(fget=lambda self:self.__feature_selections, 227 doc="List of `FeatureSelections`")
228 229 230
231 -class CombinedFeatureSelection(FeatureSelection):
232 """Meta feature selection utilizing several embedded selection methods. 233 234 Each embedded feature selection method is computed individually. Afterwards 235 all feature sets are combined by either taking the union or intersection of 236 all sets. 237 238 The individual feature sets of all embedded methods are optionally avialable 239 from the `selections_ids` state variable. 240 """ 241 selections_ids = StateVariable( 242 doc="List of feature id sets for each performed method.") 243
244 - def __init__(self, feature_selections, combiner, **kwargs):
245 """ 246 :Parameters: 247 feature_selections: list 248 FeatureSelection instances to run. Order is not important. 249 combiner: 'union', 'intersection' 250 which method to be used to combine the feature selection set of 251 all computed methods. 252 """ 253 FeatureSelection.__init__(self, **kwargs) 254 255 self.__feature_selections = feature_selections 256 self.__combiner = combiner
257 258
259 - def untrain(self):
260 if __debug__: 261 debug("FS_", "Untraining combined FS: %s" % self) 262 for fs in self.__feature_selections: 263 fs.untrain()
264 265
266 - def __call__(self, dataset, testdataset=None):
267 """Really run it. 268 """ 269 # to hold the union 270 selected_ids = None 271 # to hold the individuals 272 self.selections_ids = [] 273 274 for fs in self.__feature_selections: 275 # we need the feature ids that were selection by each method, 276 # so enable them temporarily 277 fs.states._changeTemporarily( 278 enable_states=["selected_ids"], other=self) 279 280 # compute feature selection, but ignore return datasets 281 fs(dataset, testdataset) 282 283 # retrieve feature ids and determined union of all selections 284 if selected_ids == None: 285 selected_ids = set(fs.selected_ids) 286 else: 287 if self.__combiner == 'union': 288 selected_ids.update(fs.selected_ids) 289 elif self.__combiner == 'intersection': 290 selected_ids.intersection_update(fs.selected_ids) 291 else: 292 raise ValueError, "Unknown combiner '%s'" % self.__combiner 293 294 # store individual set in state 295 self.selections_ids.append(fs.selected_ids) 296 297 # restore states to previous settings 298 fs.states._resetEnabledTemporarily() 299 300 # finally apply feature set union selection to original datasets 301 selected_ids = sorted(list(selected_ids)) 302 303 # take care of optional second dataset 304 td_sel = None 305 if not testdataset is None: 306 td_sel = testdataset.selectFeatures(self.selected_ids) 307 308 # and main dataset 309 d_sel = dataset.selectFeatures(selected_ids) 310 311 # finally store ids in state 312 self.selected_ids = selected_ids 313 314 return (d_sel, td_sel)
315 316 317 feature_selections = property(fget=lambda self:self.__feature_selections, 318 doc="List of `FeatureSelections`") 319 combiner = property(fget=lambda self:self.__combiner, 320 doc="Selection set combination method.")
321