diff --git a/features/esm.py b/features/esm.py index 3d27d77..94bc73e 100644 --- a/features/esm.py +++ b/features/esm.py @@ -316,3 +316,34 @@ def clean_up_esm(df_esm_preprocessed: pd.DataFrame) -> pd.DataFrame: ) ) return df_esm_clean + + +def increment_answers(df_esm_clean: pd.DataFrame, increment_by=1): + """ + Increment answers to keep in line with original scoring. + + We always used 0 for the lowest value of user answer. + Some scales originally used other scoring, such as starting from 1. + This restores original scoring so that the values are comparable to references. + + Parameters + ---------- + df_esm_clean: pd.DataFrame + A cleaned ESM dataframe, which must also include esm_user_answer_numeric. + increment_by: + A number to add to the user answer. + + Returns + ------- + df_esm_clean: pd.DataFrame + The same df with addition of a column 'esm_user_answer_numeric'. + + """ + try: + df_esm_clean = df_esm_clean.assign( + esm_user_score=lambda x: x.esm_user_answer_numeric + increment_by + ) + except AttributeError as e: + print("Please, clean the dataframe first using features.esm.clean_up_esm.") + print(e) + return df_esm_clean diff --git a/features/esm_COPE.py b/features/esm_COPE.py index 26cc19e..b48b155 100644 --- a/features/esm_COPE.py +++ b/features/esm_COPE.py @@ -1,5 +1,7 @@ import pandas as pd +from features.esm import increment_answers + COPE_ORIGINAL_MAX = 4 COPE_ORIGINAL_MIN = 1 @@ -187,15 +189,6 @@ def reassign_question_ids(df_cope_cleaned: pd.DataFrame) -> pd.DataFrame: ) # Finally, increment numeric answers. - try: - df_cope_fixed = df_cope_fixed.assign( - esm_user_score=lambda x: x.esm_user_answer_numeric + 1 - ) - # Increment the original answer by 1 - # to keep in line with traditional scoring - # (from COPE_ORIGINAL_MIN - COPE_ORIGINAL_MAX). - except AttributeError as e: - print("Please, clean the dataframe first using features.esm.clean_up_esm.") - print(e) + df_cope_fixed = increment_answers(df_cope_fixed) return df_cope_fixed diff --git a/features/esm_JCQ.py b/features/esm_JCQ.py index 035112c..a2222a9 100644 --- a/features/esm_JCQ.py +++ b/features/esm_JCQ.py @@ -1,5 +1,7 @@ import pandas as pd +from features.esm import increment_answers + JCQ_ORIGINAL_MAX = 4 JCQ_ORIGINAL_MIN = 1 @@ -87,11 +89,9 @@ def reverse_jcq_demand_control_scoring( # In case there is an unexpected answer, raise an exception. try: - df_esm_jcq_demand_control = df_esm_jcq_demand_control.assign( - esm_user_score=lambda x: x.esm_user_answer_numeric + 1 - ) + df_esm_jcq_demand_control = increment_answers(df_esm_jcq_demand_control) # Increment the original answer by 1 to keep in line - # with traditional scoring (JCQ_ORIGINAL_MIN - JCQ_ORIGINAL_MAX). + # with traditional scoring (from JCQ_ORIGINAL_MIN to JCQ_ORIGINAL_MAX). df_esm_jcq_demand_control[ df_esm_jcq_demand_control["question_id"].isin( DICT_JCQ_DEMAND_CONTROL_REVERSE.keys() diff --git a/features/esm_SAM.py b/features/esm_SAM.py index 585b9f5..ac416b9 100644 --- a/features/esm_SAM.py +++ b/features/esm_SAM.py @@ -2,6 +2,7 @@ import numpy as np import pandas as pd import features.esm +from features.esm import increment_answers SAM_ORIGINAL_MAX = 5 SAM_ORIGINAL_MIN = 1 @@ -506,15 +507,6 @@ def reassign_question_ids(df_sam_cleaned: pd.DataFrame) -> pd.DataFrame: ) # Finally, increment numeric answers. - try: - df_sam_fixed = df_sam_fixed.assign( - esm_user_score=lambda x: x.esm_user_answer_numeric + 1 - ) - # Increment the original answer by 1 - # to keep in line with traditional scoring - # (from SAM_ORIGINAL_MIN - SAM_ORIGINAL_MAX). - except AttributeError as e: - print("Please, clean the dataframe first using features.esm.clean_up_esm.") - print(e) + df_sam_fixed = increment_answers(df_sam_fixed) return df_sam_fixed