Read RAPIDS features and create columns.

rapids
junos 2022-01-07 17:00:12 +01:00
parent 257a044227
commit 702b091d73
1 changed files with 39 additions and 1 deletions

View File

@ -6,7 +6,7 @@
# extension: .py
# format_name: percent
# format_version: '1.3'
# jupytext_version: 1.12.0
# jupytext_version: 1.13.0
# kernelspec:
# display_name: straw2analysis
# language: python
@ -24,6 +24,7 @@ import numpy as np
import pandas as pd
import seaborn as sns
import yaml
from pyprojroot import here
from sklearn import linear_model
from sklearn.model_selection import LeaveOneGroupOut, cross_val_score
@ -257,4 +258,41 @@ model_validation.cross_validate()
# %%
model_validation.groups
# %% [markdown]
# # Use RAPIDS
# %%
with open(here("rapids/config.yaml"), "r") as file:
rapids_config = yaml.safe_load(file)
# %%
for key in rapids_config.keys():
if isinstance(rapids_config[key], dict): # Remove top-level configs
if "PROVIDERS" in rapids_config[key]: # Retain features (that have providers)
if rapids_config[key]["PROVIDERS"]: # Remove non-implemented features
for provider in rapids_config[key]["PROVIDERS"]:
if rapids_config[key]["PROVIDERS"][provider][
"COMPUTE"
]: # Check that the features were actually calculated
if "FEATURES" in rapids_config[key]["PROVIDERS"][provider]:
print(key)
print(provider)
print(rapids_config[key]["PROVIDERS"][provider]["FEATURES"])
# %%
features_rapids = pd.read_csv(
here("rapids/data/processed/features/all_participants/all_sensor_features.csv"),
parse_dates=["local_segment_start_datetime", "local_segment_end_datetime"],
)
# %%
features_rapids.columns
# %%
features_rapids = features_rapids.assign(
date_lj=lambda x: x.local_segment_start_datetime.dt.date
)
# %%
features_rapids["participant_id"] = features_rapids["pid"].str.extract("(\d+)")
features_rapids["participant_id"] = pd.to_numeric(features_rapids["participant_id"])