From 702b091d73eeb65af3d80664ae998f578712351c Mon Sep 17 00:00:00 2001 From: junos Date: Fri, 7 Jan 2022 17:00:12 +0100 Subject: [PATCH] Read RAPIDS features and create columns. --- exploration/ex_ml_pipeline.py | 40 ++++++++++++++++++++++++++++++++++- 1 file changed, 39 insertions(+), 1 deletion(-) diff --git a/exploration/ex_ml_pipeline.py b/exploration/ex_ml_pipeline.py index 328513a..39bbdbd 100644 --- a/exploration/ex_ml_pipeline.py +++ b/exploration/ex_ml_pipeline.py @@ -6,7 +6,7 @@ # extension: .py # format_name: percent # format_version: '1.3' -# jupytext_version: 1.12.0 +# jupytext_version: 1.13.0 # kernelspec: # display_name: straw2analysis # language: python @@ -24,6 +24,7 @@ import numpy as np import pandas as pd import seaborn as sns import yaml +from pyprojroot import here from sklearn import linear_model from sklearn.model_selection import LeaveOneGroupOut, cross_val_score @@ -257,4 +258,41 @@ model_validation.cross_validate() # %% model_validation.groups +# %% [markdown] +# # Use RAPIDS + # %% +with open(here("rapids/config.yaml"), "r") as file: + rapids_config = yaml.safe_load(file) + +# %% +for key in rapids_config.keys(): + if isinstance(rapids_config[key], dict): # Remove top-level configs + if "PROVIDERS" in rapids_config[key]: # Retain features (that have providers) + if rapids_config[key]["PROVIDERS"]: # Remove non-implemented features + for provider in rapids_config[key]["PROVIDERS"]: + if rapids_config[key]["PROVIDERS"][provider][ + "COMPUTE" + ]: # Check that the features were actually calculated + if "FEATURES" in rapids_config[key]["PROVIDERS"][provider]: + print(key) + print(provider) + print(rapids_config[key]["PROVIDERS"][provider]["FEATURES"]) + +# %% +features_rapids = pd.read_csv( + here("rapids/data/processed/features/all_participants/all_sensor_features.csv"), + parse_dates=["local_segment_start_datetime", "local_segment_end_datetime"], +) + +# %% +features_rapids.columns + +# %% +features_rapids = features_rapids.assign( + date_lj=lambda x: x.local_segment_start_datetime.dt.date +) + +# %% +features_rapids["participant_id"] = features_rapids["pid"].str.extract("(\d+)") +features_rapids["participant_id"] = pd.to_numeric(features_rapids["participant_id"])