Source code for moosefs.feature_selectors.elastic_net_selector

from __future__ import annotations
from typing import Any
import warnings

import numpy as np
import pandas as pd
from sklearn.linear_model import ElasticNet, LogisticRegression
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.exceptions import ConvergenceWarning
from .base_selector import FeatureSelector


[docs] class ElasticNetSelector(FeatureSelector): """Elastic‑net based selector. • regression → sklearn.linear_model.ElasticNet (L1+L2 on y∈ℝ) • classification → sklearn.linear_model.LogisticRegression with penalty='elasticnet' (solver='saga') Scores are |coef| (mean over classes if multiclass). """ name = "ElasticNet"
[docs] def __init__(self, task: str, num_features_to_select: int, **kwargs: Any) -> None: super().__init__(task, num_features_to_select) self.kwargs = kwargs
[docs] def compute_scores(self, X: Any, y: Any) -> np.ndarray: # Ensure tabular objects for column-safe slicing later in the pipeline if isinstance(X, np.ndarray): X = pd.DataFrame(X, columns=[f"feature_{i}" for i in range(X.shape[1])]) if isinstance(y, (pd.DataFrame, np.ndarray)) and getattr(y, "ndim", 1) == 2: y = np.ravel(y) if self.task == "regression": params = { "alpha": self.kwargs.pop("alpha", 1.0), "l1_ratio": self.kwargs.pop("l1_ratio", 0.5), "max_iter": self.kwargs.pop("max_iter", 100_000), **self.kwargs, } model = make_pipeline( StandardScaler(with_mean=True, with_std=True), ElasticNet(**params) ) # Fit, silencing only ConvergenceWarning (optional but useful) with warnings.catch_warnings(): warnings.filterwarnings("ignore", category=ConvergenceWarning) model.fit(X, y) #model = ElasticNet(**params) model.fit(X, y) coef = model[-1].coef_ elif self.task == "classification": # LogisticRegression uses C instead of alpha; keep both if user passes. l1_ratio = self.kwargs.pop("l1_ratio", 0.5) C = self.kwargs.pop("C", 1.0) params = { "penalty": "elasticnet", "solver": "saga", "l1_ratio": l1_ratio, "C": C, "max_iter": self.kwargs.pop("max_iter", 100_000), **self.kwargs, } model = LogisticRegression(**params) model.fit(X, y) coef = model.coef_ # shape (n_classes, n_features) or (1, n_features) if coef.ndim > 1: coef = np.mean(np.abs(coef), axis=0) else: raise ValueError("Task must be 'classification' or 'regression'.") scores = np.abs(coef) if isinstance(coef, np.ndarray) else np.abs(np.asarray(coef)) return scores