stress_at_work_analysis/machine_learning/feature_selection.py

45 lines
1.6 KiB
Python

import os
import sys
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.feature_selection import SequentialFeatureSelector
from sklearn.naive_bayes import GaussianNB
""" Feature selection pipeline: a methods that can be used in the wrapper metod alongside other wrapper contents (hyperparameter tuning etc.).
(1) Establish methods for each of the steps in feature selection protocol:
(a) feature selection inside specific sensors (sklearn method): returns most important features from all sensors
(b) feature selection between "tuned" sensors: returns filtered sensors, containing most important features retured with (a)
(2) Ensure that above methods are given only a part of data and use appropriate random seeds - to later simulate use case in production.
(3) Implement a method which gives graphical exploration of (1) (a) and (b) steps of the feature selection.
(4) Prepare a core method that can be fit into a wrapper (see sklearn wrapper methods) and integrates methods from (1)
"""
class FeatureSelection:
def __init__(self, X_train, X_test, y_train, y_test): # TODO: what about leave-one-subject-out CV?
pass
def within_sensors_feature_selection(estimator, scoring, tol):
features_list = []
nb = GaussianNB()
sfs = SequentialFeatureSelector(nb, n_features_to_select='auto', tol=0.02) # Can set n_features to an absolute value -> then remove tol parameter.
return features_list
def between_sensors_feature_selection():
pass
def vizualize_feature_selection_process():
pass
def execute_feature_selection_step():
pass