diff --git a/exploration/ml_pipeline_feature_selection.py b/exploration/ml_pipeline_feature_selection.py new file mode 100644 index 0000000..902a184 --- /dev/null +++ b/exploration/ml_pipeline_feature_selection.py @@ -0,0 +1,36 @@ +# --- +# jupyter: +# jupytext: +# formats: ipynb,py:percent +# text_representation: +# extension: .py +# format_name: percent +# format_version: '1.3' +# jupytext_version: 1.13.0 +# kernelspec: +# display_name: straw2analysis +# language: python +# name: straw2analysis +# --- + +# %% +# %matplotlib inline + +import os +import sys + +import numpy as np +import matplotlib.pyplot as plt +import pandas as pd + +# %% +""" Feature selection pipeline: a methods that can be used in the wrapper metod alongside other wrapper contents (hyperparameter tuning etc.). + +(1) Establish methods for each of the steps in feature selection protocol: + (a) feature selection inside specific sensors (sklearn method): returns most important features from all sensors + (b) feature selection between "tuned" sensors: returns filtered sensors, containing most important features retured with (a) +(2) Ensure that above methods are given only a part of data and use appropriate random seeds - to later simulate use case in production. +(3) Implement a method which gives graphical exploration of (1) (a) and (b) steps of the feature selection. +(4) Prepare a core method that can be fit into a wrapper (see sklearn wrapper methods) and integrates methods from (1) + +""" \ No newline at end of file