diff --git a/src/features/phone_esm/straw/main.py b/src/features/phone_esm/straw/main.py index cc7131e8..e3ff1aca 100644 --- a/src/features/phone_esm/straw/main.py +++ b/src/features/phone_esm/straw/main.py @@ -1,11 +1,15 @@ import pandas as pd +from esm_preprocess import QUESTIONNAIRE_IDS + def straw_features(sensor_data_files, time_segment, provider, filter_data_by_segment, *args, **kwargs): esm_data = pd.read_csv(sensor_data_files["sensor_data"]) requested_features = provider["FEATURES"] # name of the features this function can compute + requested_scales = provider["SCALES"] base_features_names = ["PANAS_positive_affect", "PANAS_negative_affect", "JCQ_job_demand", "JCQ_job_control", "JCQ_supervisor_support", "JCQ_coworker_support"] + #TODO Check valid questionnaire and feature names. # the subset of requested features this function can compute features_to_compute = list(set(requested_features) & set(base_features_names)) esm_features = pd.DataFrame(columns=["local_segment"] + features_to_compute) @@ -15,8 +19,11 @@ def straw_features(sensor_data_files, time_segment, provider, filter_data_by_seg if not esm_data.empty: esm_features = pd.DataFrame() - esm_features["mean"] = esm_data.groupby(["local_segment"])["esm_user_score"].mean() - #TODO Create the column esm_user_score in esm_clean. Currently, this is only done when reversing. + for scale in requested_scales: + questionnaire_id = QUESTIONNAIRE_IDS[scale] + mask = esm_data["questionnaire_id"] == questionnaire_id + esm_features[scale + "_mean"] = esm_data.loc[mask].groupby(["local_segment"])["esm_user_score"].mean() + #TODO Create the column esm_user_score in esm_clean. Currently, this is only done when reversing. esm_features = esm_features.reset_index() return esm_features