import numpy as np
[docs]
class StabilityNovovicova:
"""
Computes the stability of feature selection algorithms based on Novovicová et al. (2009).
References:
Novovicová, J., Somol, P., & Pudil, P. (2009). "A New Measure of Feature Selection
Algorithms' Stability." IEEE International Conference on Data Mining Workshops.
"""
[docs]
def __init__(self, selected_features: list):
"""
Args:
selected_features: A list of sets or lists, where each represents selected features in a dataset.
"""
self._validate_inputs(selected_features)
self.selected_features: list = [
set(sel) for sel in selected_features
] # Convert all to sets
self.N: int = sum(
len(sel) for sel in self.selected_features
) # Total feature occurrences
self.n: int = len(self.selected_features) # Number of datasets
[docs]
def compute_stability(self) -> float:
"""
Computes the stability measure SH(S), ranging from 0 (no stability) to 1 (full stability).
Returns:
Stability score.
"""
if self.N == 0 or self.n == 1:
return 0.0 # Stability is not meaningful for a single subset or empty selection.
# Count occurrences of each unique feature
feature_counts: dict = {}
for sublist in self.selected_features:
for feature in sublist:
feature_counts[feature] = feature_counts.get(feature, 0) + 1
# Compute stability measure
SH_S: float = sum(count * np.log2(count) for count in feature_counts.values())
return SH_S / (self.N * np.log2(self.n))