Read RAPIDS features and create columns.
parent
257a044227
commit
702b091d73
|
@ -6,7 +6,7 @@
|
||||||
# extension: .py
|
# extension: .py
|
||||||
# format_name: percent
|
# format_name: percent
|
||||||
# format_version: '1.3'
|
# format_version: '1.3'
|
||||||
# jupytext_version: 1.12.0
|
# jupytext_version: 1.13.0
|
||||||
# kernelspec:
|
# kernelspec:
|
||||||
# display_name: straw2analysis
|
# display_name: straw2analysis
|
||||||
# language: python
|
# language: python
|
||||||
|
@ -24,6 +24,7 @@ import numpy as np
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
import seaborn as sns
|
import seaborn as sns
|
||||||
import yaml
|
import yaml
|
||||||
|
from pyprojroot import here
|
||||||
from sklearn import linear_model
|
from sklearn import linear_model
|
||||||
from sklearn.model_selection import LeaveOneGroupOut, cross_val_score
|
from sklearn.model_selection import LeaveOneGroupOut, cross_val_score
|
||||||
|
|
||||||
|
@ -257,4 +258,41 @@ model_validation.cross_validate()
|
||||||
# %%
|
# %%
|
||||||
model_validation.groups
|
model_validation.groups
|
||||||
|
|
||||||
|
# %% [markdown]
|
||||||
|
# # Use RAPIDS
|
||||||
|
|
||||||
# %%
|
# %%
|
||||||
|
with open(here("rapids/config.yaml"), "r") as file:
|
||||||
|
rapids_config = yaml.safe_load(file)
|
||||||
|
|
||||||
|
# %%
|
||||||
|
for key in rapids_config.keys():
|
||||||
|
if isinstance(rapids_config[key], dict): # Remove top-level configs
|
||||||
|
if "PROVIDERS" in rapids_config[key]: # Retain features (that have providers)
|
||||||
|
if rapids_config[key]["PROVIDERS"]: # Remove non-implemented features
|
||||||
|
for provider in rapids_config[key]["PROVIDERS"]:
|
||||||
|
if rapids_config[key]["PROVIDERS"][provider][
|
||||||
|
"COMPUTE"
|
||||||
|
]: # Check that the features were actually calculated
|
||||||
|
if "FEATURES" in rapids_config[key]["PROVIDERS"][provider]:
|
||||||
|
print(key)
|
||||||
|
print(provider)
|
||||||
|
print(rapids_config[key]["PROVIDERS"][provider]["FEATURES"])
|
||||||
|
|
||||||
|
# %%
|
||||||
|
features_rapids = pd.read_csv(
|
||||||
|
here("rapids/data/processed/features/all_participants/all_sensor_features.csv"),
|
||||||
|
parse_dates=["local_segment_start_datetime", "local_segment_end_datetime"],
|
||||||
|
)
|
||||||
|
|
||||||
|
# %%
|
||||||
|
features_rapids.columns
|
||||||
|
|
||||||
|
# %%
|
||||||
|
features_rapids = features_rapids.assign(
|
||||||
|
date_lj=lambda x: x.local_segment_start_datetime.dt.date
|
||||||
|
)
|
||||||
|
|
||||||
|
# %%
|
||||||
|
features_rapids["participant_id"] = features_rapids["pid"].str.extract("(\d+)")
|
||||||
|
features_rapids["participant_id"] = pd.to_numeric(features_rapids["participant_id"])
|
||||||
|
|
Loading…
Reference in New Issue