diff --git a/features/esm_SAM.py b/features/esm_SAM.py new file mode 100644 index 0000000..0c5ba21 --- /dev/null +++ b/features/esm_SAM.py @@ -0,0 +1,80 @@ +import pandas as pd + +import features.esm + +QUESTIONNAIRE_ID_SAM = { + "event_stress": 87, + "event_threat": 88, + "event_challenge": 89, + "event_time": 90, + "event_duration": 91, + "event_work_related": 92, + "period_stress": 93, +} +QUESTIONNAIRE_ID_SAM_LOW = min(QUESTIONNAIRE_ID_SAM.values()) +QUESTIONNAIRE_ID_SAM_HIGH = max(QUESTIONNAIRE_ID_SAM.values()) + +GROUP_QUESTIONNAIRES_BY = [ + "participant_id", + "device_id", + "esm_session", + "questionnaire_id", +] +# Each questionnaire occurs only once within each esm_session on the same device within the same participant. + + +def extract_stressful_events(df_esm: pd.DataFrame) -> pd.DataFrame: + # 0. Select only questions from Stress Appraisal Measure. + df_esm_preprocessed = features.esm.preprocess_esm(df_esm) + df_esm_sam = df_esm_preprocessed[ + (df_esm_preprocessed["questionnaire_id"] >= QUESTIONNAIRE_ID_SAM_LOW) + & (df_esm_preprocessed["questionnaire_id"] <= QUESTIONNAIRE_ID_SAM_HIGH) + ] + + df_esm_sam_clean = features.esm.clean_up_esm(df_esm_sam) + # 1. + df_esm_event_threat_challenge_mean_wide = calculate_threat_challenge_means( + df_esm_sam_clean + ) + + return df_esm_event_threat_challenge_mean_wide + + +def calculate_threat_challenge_means(df_esm_sam_clean: pd.DataFrame) -> pd.DataFrame: + # Select only threat and challenge assessments for events + df_esm_event_threat_challenge = df_esm_sam_clean[ + ( + df_esm_sam_clean["questionnaire_id"] + == QUESTIONNAIRE_ID_SAM.get("event_threat") + ) + | ( + df_esm_sam_clean["questionnaire_id"] + == QUESTIONNAIRE_ID_SAM.get("event_challenge") + ) + ] + # Calculate mean of threat and challenge subscales for each ESM session. + df_esm_event_threat_challenge_mean = ( + df_esm_event_threat_challenge.groupby(GROUP_QUESTIONNAIRES_BY) + .esm_user_answer_numeric.agg("mean") + .reset_index() + .rename(columns={"esm_user_answer_numeric": "esm_numeric_mean"}) + ) + # Rename questionnaire ID to indicate their names. + df_esm_event_threat_challenge_mean[ + "event_subscale" + ] = df_esm_event_threat_challenge_mean.questionnaire_id.astype( + "category" + ).cat.rename_categories( + { + QUESTIONNAIRE_ID_SAM.get("event_threat"): "threat_mean", + QUESTIONNAIRE_ID_SAM.get("event_challenge"): "challenge_mean", + } + ) + # Pivot a table so that each ESM session is represented by one row with threat and challenge means as two columns. + df_esm_event_threat_challenge_mean_wide = pd.pivot( + df_esm_event_threat_challenge_mean, + index=GROUP_QUESTIONNAIRES_BY, + columns=["event_subscale"], + values=["esm_numeric_mean"], + ) + return df_esm_event_threat_challenge_mean_wide