Aggregate labels using grouping_variable.

rapids
junos 2021-08-20 19:17:22 +02:00
parent 08fdec34f1
commit 0b98d59aad
3 changed files with 35 additions and 2 deletions

View File

@ -16,6 +16,7 @@
# %%
# %matplotlib inline
import datetime
import importlib
import os
import sys
@ -156,6 +157,9 @@ lin_reg_proximity.score(
# %%
from machine_learning import pipeline
# %%
importlib.reload(pipeline)
# %%
with open("../machine_learning/config/minimal_features.yaml", "r") as file:
sensor_features_params = yaml.safe_load(file)
@ -204,3 +208,9 @@ labels.set_labels()
labels.get_labels("PANAS")
# %%
labels.aggregate_labels()
# %%
labels.get_aggregated_labels()
# %%

View File

@ -1,4 +1,4 @@
grouping_variable: date_lj
grouping_variable: [date_lj]
labels:
PANAS:
- PA

View File

@ -94,7 +94,7 @@ class SensorFeatures:
class Labels:
def __init__(
self,
grouping_variable: str,
grouping_variable: list,
labels: dict,
participants_usernames: Collection = None,
):
@ -113,6 +113,8 @@ class Labels:
self.df_esm_interest = pd.DataFrame()
self.df_esm_clean = pd.DataFrame()
self.df_esm_means = pd.DataFrame()
def set_labels(self):
self.df_esm = esm.get_esm_data(self.participants_usernames)
self.df_esm_preprocessed = esm.preprocess_esm(self.df_esm)
@ -135,6 +137,27 @@ class Labels:
else:
raise KeyError("This questionnaire has not been implemented as a label.")
def aggregate_labels(self):
self.df_esm_means = (
self.df_esm_clean.groupby(["participant_id", "questionnaire_id"] + self.grouping_variable)
.esm_user_answer_numeric.agg("mean")
.reset_index()
.rename(columns={"esm_user_answer_numeric": "esm_numeric_mean"})
)
self.df_esm_means = (
self.df_esm_means.pivot(
index=["participant_id"] + self.grouping_variable,
columns="questionnaire_id",
values="esm_numeric_mean",
)
.reset_index(col_level=1)
.rename(columns=QUESTIONNAIRE_IDS_RENAME)
.set_index(["participant_id"] + self.grouping_variable)
)
def get_aggregated_labels(self):
return self.df_esm_means
def safe_outer_merge_on_index(left, right):
if left.empty: