diff --git a/exploration/ml_pipeline_feature_selection.py b/machine_learning/feature_selection.py similarity index 55% rename from exploration/ml_pipeline_feature_selection.py rename to machine_learning/feature_selection.py index 902a184..690712f 100644 --- a/exploration/ml_pipeline_feature_selection.py +++ b/machine_learning/feature_selection.py @@ -1,21 +1,3 @@ -# --- -# jupyter: -# jupytext: -# formats: ipynb,py:percent -# text_representation: -# extension: .py -# format_name: percent -# format_version: '1.3' -# jupytext_version: 1.13.0 -# kernelspec: -# display_name: straw2analysis -# language: python -# name: straw2analysis -# --- - -# %% -# %matplotlib inline - import os import sys @@ -23,7 +5,10 @@ import numpy as np import matplotlib.pyplot as plt import pandas as pd -# %% +from sklearn.feature_selection import SequentialFeatureSelector +from sklearn.naive_bayes import GaussianNB + + """ Feature selection pipeline: a methods that can be used in the wrapper metod alongside other wrapper contents (hyperparameter tuning etc.). (1) Establish methods for each of the steps in feature selection protocol: @@ -33,4 +18,28 @@ import pandas as pd (3) Implement a method which gives graphical exploration of (1) (a) and (b) steps of the feature selection. (4) Prepare a core method that can be fit into a wrapper (see sklearn wrapper methods) and integrates methods from (1) -""" \ No newline at end of file +""" + +class FeatureSelection: + + def __init__(self, X_train, X_test, y_train, y_test): # TODO: what about leave-one-subject-out CV? + pass + + + def within_sensors_feature_selection(estimator, scoring, tol): + features_list = [] + + nb = GaussianNB() + sfs = SequentialFeatureSelector(nb, n_features_to_select='auto', tol=0.02) # Can set n_features to an absolute value -> then remove tol parameter. + + + return features_list + + def between_sensors_feature_selection(): + pass + + def vizualize_feature_selection_process(): + pass + + def execute_feature_selection_step(): + pass \ No newline at end of file