Read RAPIDS features and create columns.

rapids
junos 2022-01-07 17:00:12 +01:00
parent 257a044227
commit 702b091d73
1 changed files with 39 additions and 1 deletions

View File

@ -6,7 +6,7 @@
# extension: .py # extension: .py
# format_name: percent # format_name: percent
# format_version: '1.3' # format_version: '1.3'
# jupytext_version: 1.12.0 # jupytext_version: 1.13.0
# kernelspec: # kernelspec:
# display_name: straw2analysis # display_name: straw2analysis
# language: python # language: python
@ -24,6 +24,7 @@ import numpy as np
import pandas as pd import pandas as pd
import seaborn as sns import seaborn as sns
import yaml import yaml
from pyprojroot import here
from sklearn import linear_model from sklearn import linear_model
from sklearn.model_selection import LeaveOneGroupOut, cross_val_score from sklearn.model_selection import LeaveOneGroupOut, cross_val_score
@ -257,4 +258,41 @@ model_validation.cross_validate()
# %% # %%
model_validation.groups model_validation.groups
# %% [markdown]
# # Use RAPIDS
# %% # %%
with open(here("rapids/config.yaml"), "r") as file:
rapids_config = yaml.safe_load(file)
# %%
for key in rapids_config.keys():
if isinstance(rapids_config[key], dict): # Remove top-level configs
if "PROVIDERS" in rapids_config[key]: # Retain features (that have providers)
if rapids_config[key]["PROVIDERS"]: # Remove non-implemented features
for provider in rapids_config[key]["PROVIDERS"]:
if rapids_config[key]["PROVIDERS"][provider][
"COMPUTE"
]: # Check that the features were actually calculated
if "FEATURES" in rapids_config[key]["PROVIDERS"][provider]:
print(key)
print(provider)
print(rapids_config[key]["PROVIDERS"][provider]["FEATURES"])
# %%
features_rapids = pd.read_csv(
here("rapids/data/processed/features/all_participants/all_sensor_features.csv"),
parse_dates=["local_segment_start_datetime", "local_segment_end_datetime"],
)
# %%
features_rapids.columns
# %%
features_rapids = features_rapids.assign(
date_lj=lambda x: x.local_segment_start_datetime.dt.date
)
# %%
features_rapids["participant_id"] = features_rapids["pid"].str.extract("(\d+)")
features_rapids["participant_id"] = pd.to_numeric(features_rapids["participant_id"])