import json import numpy as np import pandas as pd ESM_TYPE = { "text": 1, "radio": 2, "checkbox": 3, "likert": 4, "quick_answers": 5, "scale": 6, "datetime": 7, "pam": 8, "number": 9, "web": 10, "date": 11, } QUESTIONNAIRE_IDS = { "sleep_quality": 1, "PANAS_positive_affect": 8, "PANAS_negative_affect": 9, "JCQ_job_demand": 10, "JCQ_job_control": 11, "JCQ_supervisor_support": 12, "JCQ_coworker_support": 13, "PFITS_supervisor": 14, "PFITS_coworkers": 15, "UWES_vigor": 16, "UWES_dedication": 17, "UWES_absorption": 18, "COPE_active": 19, "COPE_support": 20, "COPE_emotions": 21, "balance_life_work": 22, "balance_work_life": 23, "recovery_experience_detachment": 24, "recovery_experience_relaxation": 25, "symptoms": 26, "appraisal_stressfulness_event": 87, "appraisal_threat": 88, "appraisal_challenge": 89, "appraisal_event_time": 90, "appraisal_event_duration": 91, "appraisal_event_work_related": 92, "appraisal_stressfulness_period": 93, "late_work": 94, "work_hours": 95, "left_work": 96, "activities": 97, "coffee_breaks": 98, "at_work_yet": 99, } ESM_STATUS_ANSWERED = 2 GROUP_SESSIONS_BY = ["participant_id", "device_id", "esm_session"] SESSION_STATUS_UNANSWERED = "ema_unanswered" SESSION_STATUS_DAY_FINISHED = "day_finished" SESSION_STATUS_COMPLETE = "ema_completed" ANSWER_DAY_FINISHED = "DayFinished3421" ANSWER_DAY_OFF = "DayOff3421" ANSWER_SET_EVENING = "DayFinishedSetEvening" MAX_MORNING_LENGTH = 3 # When the participants was not yet at work at the time of the first (morning) EMA, # only three items were answered. # Two sleep related items and one indicating NOT starting work yet. # Daytime EMAs are all longer, in fact they always consist of at least 6 items. def preprocess_esm(df_esm: pd.DataFrame) -> pd.DataFrame: """ Convert timestamps into human-readable datetimes and dates and expand the JSON column into several Pandas DF columns. Parameters ---------- df_esm: pd.DataFrame A dataframe of esm data. Returns ------- df_esm_preprocessed: pd.DataFrame A dataframe with added columns: datetime in Ljubljana timezone and all fields from ESM_JSON column. """ df_esm_json = df_esm["esm_json"].apply(json.loads) df_esm_json = pd.json_normalize(df_esm_json).drop( columns=["esm_trigger"] ) # The esm_trigger column is already present in the main df. return df_esm.join(df_esm_json) def clean_up_esm(df_esm_preprocessed: pd.DataFrame) -> pd.DataFrame: """ This function eliminates invalid ESM responses. It removes unanswered ESMs and those that indicate end of work and similar. It also extracts a numeric answer from strings such as "4 - I strongly agree". Parameters ---------- df_esm_preprocessed: pd.DataFrame A preprocessed dataframe of esm data. Returns ------- df_esm_clean: pd.DataFrame A subset of the original dataframe. """ df_esm_clean = df_esm_preprocessed[ df_esm_preprocessed["esm_status"] == ESM_STATUS_ANSWERED ] df_esm_clean = df_esm_clean[ ~df_esm_clean["esm_user_answer"].isin( [ANSWER_DAY_FINISHED, ANSWER_DAY_OFF, ANSWER_SET_EVENING] ) ] df_esm_clean["esm_user_answer_numeric"] = np.nan esm_type_numeric = [ ESM_TYPE.get("radio"), ESM_TYPE.get("scale"), ESM_TYPE.get("number"), ] df_esm_clean.loc[ df_esm_clean["esm_type"].isin(esm_type_numeric) ] = df_esm_clean.loc[df_esm_clean["esm_type"].isin(esm_type_numeric)].assign( esm_user_answer_numeric=lambda x: x.esm_user_answer.str.slice(stop=1).astype( int ) ) return df_esm_clean