Extract method to reuse and simplify.

2023-07-03 20:52:08 +02:00 · 2023-07-03 20:52:08 +02:00 · 8c0b66eddc
parent e3ff4846e1
commit 8c0b66eddc
4 changed files with 40 additions and 24 deletions
--- a/features/esm.py
+++ b/features/esm.py
@ -316,3 +316,34 @@ def clean_up_esm(df_esm_preprocessed: pd.DataFrame) -> pd.DataFrame:
        )
    )
    return df_esm_clean
 def increment_answers(df_esm_clean: pd.DataFrame, increment_by=1):
    """
    Increment answers to keep in line with original scoring.
    We always used 0 for the lowest value of user answer.
    Some scales originally used other scoring, such as starting from 1.
    This restores original scoring so that the values are comparable to references.
    Parameters
    ----------
    df_esm_clean: pd.DataFrame
        A cleaned ESM dataframe, which must also include esm_user_answer_numeric.
    increment_by:
        A number to add to the user answer.
    Returns
    -------
    df_esm_clean: pd.DataFrame
        The same df with addition of a column 'esm_user_answer_numeric'.
    """
    try:
        df_esm_clean = df_esm_clean.assign(
            esm_user_score=lambda x: x.esm_user_answer_numeric + increment_by
        )
    except AttributeError as e:
        print("Please, clean the dataframe first using features.esm.clean_up_esm.")
        print(e)
    return df_esm_clean
--- a/features/esm_COPE.py
+++ b/features/esm_COPE.py
@ -1,5 +1,7 @@
 import pandas as pd
 from features.esm import increment_answers
 COPE_ORIGINAL_MAX = 4
 COPE_ORIGINAL_MIN = 1
@ -187,15 +189,6 @@ def reassign_question_ids(df_cope_cleaned: pd.DataFrame) -> pd.DataFrame:
    )
    # Finally, increment numeric answers.
-    try:
+    df_cope_fixed = increment_answers(df_cope_fixed)
        df_cope_fixed = df_cope_fixed.assign(
            esm_user_score=lambda x: x.esm_user_answer_numeric + 1
        )
        # Increment the original answer by 1
        # to keep in line with traditional scoring
        # (from COPE_ORIGINAL_MIN - COPE_ORIGINAL_MAX).
    except AttributeError as e:
        print("Please, clean the dataframe first using features.esm.clean_up_esm.")
        print(e)
    return df_cope_fixed
--- a/features/esm_JCQ.py
+++ b/features/esm_JCQ.py
@ -1,5 +1,7 @@
 import pandas as pd
 from features.esm import increment_answers
 JCQ_ORIGINAL_MAX = 4
 JCQ_ORIGINAL_MIN = 1
@ -87,11 +89,9 @@ def reverse_jcq_demand_control_scoring(
            # In case there is an unexpected answer, raise an exception.
    try:
-        df_esm_jcq_demand_control = df_esm_jcq_demand_control.assign(
+        df_esm_jcq_demand_control = increment_answers(df_esm_jcq_demand_control)
            esm_user_score=lambda x: x.esm_user_answer_numeric + 1
        )
        # Increment the original answer by 1 to keep in line
-        # with traditional scoring (JCQ_ORIGINAL_MIN - JCQ_ORIGINAL_MAX).
+        # with traditional scoring (from JCQ_ORIGINAL_MIN to JCQ_ORIGINAL_MAX).
        df_esm_jcq_demand_control[
            df_esm_jcq_demand_control["question_id"].isin(
                DICT_JCQ_DEMAND_CONTROL_REVERSE.keys()
--- a/features/esm_SAM.py
+++ b/features/esm_SAM.py
@ -2,6 +2,7 @@ import numpy as np
 import pandas as pd
 import features.esm
 from features.esm import increment_answers
 SAM_ORIGINAL_MAX = 5
 SAM_ORIGINAL_MIN = 1
@ -506,15 +507,6 @@ def reassign_question_ids(df_sam_cleaned: pd.DataFrame) -> pd.DataFrame:
    )
    # Finally, increment numeric answers.
-    try:
+    df_sam_fixed = increment_answers(df_sam_fixed)
        df_sam_fixed = df_sam_fixed.assign(
            esm_user_score=lambda x: x.esm_user_answer_numeric + 1
        )
        # Increment the original answer by 1
        # to keep in line with traditional scoring
        # (from SAM_ORIGINAL_MIN - SAM_ORIGINAL_MAX).
    except AttributeError as e:
        print("Please, clean the dataframe first using features.esm.clean_up_esm.")
        print(e)
    return df_sam_fixed