Extract method to reuse and simplify.

master
junos 2023-07-03 20:52:08 +02:00
parent e3ff4846e1
commit 8c0b66eddc
4 changed files with 40 additions and 24 deletions

View File

@ -316,3 +316,34 @@ def clean_up_esm(df_esm_preprocessed: pd.DataFrame) -> pd.DataFrame:
) )
) )
return df_esm_clean return df_esm_clean
def increment_answers(df_esm_clean: pd.DataFrame, increment_by=1):
"""
Increment answers to keep in line with original scoring.
We always used 0 for the lowest value of user answer.
Some scales originally used other scoring, such as starting from 1.
This restores original scoring so that the values are comparable to references.
Parameters
----------
df_esm_clean: pd.DataFrame
A cleaned ESM dataframe, which must also include esm_user_answer_numeric.
increment_by:
A number to add to the user answer.
Returns
-------
df_esm_clean: pd.DataFrame
The same df with addition of a column 'esm_user_answer_numeric'.
"""
try:
df_esm_clean = df_esm_clean.assign(
esm_user_score=lambda x: x.esm_user_answer_numeric + increment_by
)
except AttributeError as e:
print("Please, clean the dataframe first using features.esm.clean_up_esm.")
print(e)
return df_esm_clean

View File

@ -1,5 +1,7 @@
import pandas as pd import pandas as pd
from features.esm import increment_answers
COPE_ORIGINAL_MAX = 4 COPE_ORIGINAL_MAX = 4
COPE_ORIGINAL_MIN = 1 COPE_ORIGINAL_MIN = 1
@ -187,15 +189,6 @@ def reassign_question_ids(df_cope_cleaned: pd.DataFrame) -> pd.DataFrame:
) )
# Finally, increment numeric answers. # Finally, increment numeric answers.
try: df_cope_fixed = increment_answers(df_cope_fixed)
df_cope_fixed = df_cope_fixed.assign(
esm_user_score=lambda x: x.esm_user_answer_numeric + 1
)
# Increment the original answer by 1
# to keep in line with traditional scoring
# (from COPE_ORIGINAL_MIN - COPE_ORIGINAL_MAX).
except AttributeError as e:
print("Please, clean the dataframe first using features.esm.clean_up_esm.")
print(e)
return df_cope_fixed return df_cope_fixed

View File

@ -1,5 +1,7 @@
import pandas as pd import pandas as pd
from features.esm import increment_answers
JCQ_ORIGINAL_MAX = 4 JCQ_ORIGINAL_MAX = 4
JCQ_ORIGINAL_MIN = 1 JCQ_ORIGINAL_MIN = 1
@ -87,11 +89,9 @@ def reverse_jcq_demand_control_scoring(
# In case there is an unexpected answer, raise an exception. # In case there is an unexpected answer, raise an exception.
try: try:
df_esm_jcq_demand_control = df_esm_jcq_demand_control.assign( df_esm_jcq_demand_control = increment_answers(df_esm_jcq_demand_control)
esm_user_score=lambda x: x.esm_user_answer_numeric + 1
)
# Increment the original answer by 1 to keep in line # Increment the original answer by 1 to keep in line
# with traditional scoring (JCQ_ORIGINAL_MIN - JCQ_ORIGINAL_MAX). # with traditional scoring (from JCQ_ORIGINAL_MIN to JCQ_ORIGINAL_MAX).
df_esm_jcq_demand_control[ df_esm_jcq_demand_control[
df_esm_jcq_demand_control["question_id"].isin( df_esm_jcq_demand_control["question_id"].isin(
DICT_JCQ_DEMAND_CONTROL_REVERSE.keys() DICT_JCQ_DEMAND_CONTROL_REVERSE.keys()

View File

@ -2,6 +2,7 @@ import numpy as np
import pandas as pd import pandas as pd
import features.esm import features.esm
from features.esm import increment_answers
SAM_ORIGINAL_MAX = 5 SAM_ORIGINAL_MAX = 5
SAM_ORIGINAL_MIN = 1 SAM_ORIGINAL_MIN = 1
@ -506,15 +507,6 @@ def reassign_question_ids(df_sam_cleaned: pd.DataFrame) -> pd.DataFrame:
) )
# Finally, increment numeric answers. # Finally, increment numeric answers.
try: df_sam_fixed = increment_answers(df_sam_fixed)
df_sam_fixed = df_sam_fixed.assign(
esm_user_score=lambda x: x.esm_user_answer_numeric + 1
)
# Increment the original answer by 1
# to keep in line with traditional scoring
# (from SAM_ORIGINAL_MIN - SAM_ORIGINAL_MAX).
except AttributeError as e:
print("Please, clean the dataframe first using features.esm.clean_up_esm.")
print(e)
return df_sam_fixed return df_sam_fixed