diff --git a/features/esm_COPE.py b/features/esm_COPE.py index f356625..26cc19e 100644 --- a/features/esm_COPE.py +++ b/features/esm_COPE.py @@ -128,6 +128,25 @@ DICT_COPE_QUESTION_IDS = { def reassign_question_ids(df_cope_cleaned: pd.DataFrame) -> pd.DataFrame: + """ + Fix question IDs to match their actual content. + + Unfortunately, when altering the protocol to adapt to COVID pandemic, + we did not retain original question IDs. + This means that for participants before 2021, they are different + from for the rest of them. + This function searches for question IDs by matching their strings. + + Parameters + ---------- + df_cope_cleaned: pd.DataFrame + A cleaned up dataframe, which must also include esm_user_answer_numeric. + + Returns + ------- + df_cope_fixed: pd.DataFrame + The same dataframe but with fixed question IDs. + """ df_esm_cope_unique_questions = ( df_cope_cleaned.groupby("question_id") .esm_instructions.value_counts() diff --git a/features/esm_SAM.py b/features/esm_SAM.py index a1f46d7..585b9f5 100644 --- a/features/esm_SAM.py +++ b/features/esm_SAM.py @@ -447,6 +447,25 @@ def extract_event_duration(df_esm_sam_clean: pd.DataFrame) -> pd.DataFrame: def reassign_question_ids(df_sam_cleaned: pd.DataFrame) -> pd.DataFrame: + """ + Fix question IDs to match their actual content. + + Unfortunately, when altering the protocol to adapt to COVID pandemic, + we did not retain original question IDs. + This means that for participants before 2021, they are different + from for the rest of them. + This function searches for question IDs by matching their strings. + + Parameters + ---------- + df_sam_cleaned: pd.DataFrame + A cleaned up dataframe, which must also include esm_user_answer_numeric. + + Returns + ------- + df_sam_fixed: pd.DataFrame + The same dataframe but with fixed question IDs. + """ df_esm_sam_unique_questions = ( df_sam_cleaned.groupby("question_id") .esm_instructions.value_counts()