Implement a method to recode JCQ answers.

communication
junos 2021-07-05 18:24:22 +02:00
parent 459f7a2c72
commit e2808422db
2 changed files with 79 additions and 1 deletions

View File

@ -228,7 +228,8 @@ def classify_sessions_by_completion_time(
def clean_up_esm(df_esm_preprocessed: pd.DataFrame) -> pd.DataFrame: def clean_up_esm(df_esm_preprocessed: pd.DataFrame) -> pd.DataFrame:
""" """
This function eliminates invalid ESM responses. This function eliminates invalid ESM responses.
It removes unanswered ESMs. It removes unanswered ESMs and those that indicate end of work and similar.
It also extracts a numeric answer from strings such as "4 - I strongly agree".
Parameters Parameters
---------- ----------

View File

@ -1,3 +1,8 @@
import pandas as pd
JCQ_ORIGINAL_MAX = 4
JCQ_ORIGINAL_MIN = 1
dict_JCQ_demand_control_reverse = { dict_JCQ_demand_control_reverse = {
75: ( 75: (
"I was NOT asked", "I was NOT asked",
@ -29,3 +34,75 @@ dict_JCQ_demand_control_reverse = {
"Pri svojem delu sem imela zelo malo svobode", "Pri svojem delu sem imela zelo malo svobode",
), ),
} }
def reverse_jcq_demand_control_scoring(
df_esm_jcq_demand_control: pd.DataFrame,
) -> pd.DataFrame:
"""
This function recodes answers in Job content questionnaire by first incrementing them by 1,
to be in line with original (1-4) scoring.
Then, some answers are reversed (i.e. 1 becomes 4 etc.), because the questions are negatively phrased.
These answers are listed in dict_JCQ_demand_control_reverse and identified by their question ID.
However, the existing data is checked against literal phrasing of these questions
to protect against wrong numbering of questions (differing question IDs).
Parameters
----------
df_esm_jcq_demand_control: pd.DataFrame
A cleaned up dataframe, which must also include esm_user_answer_numeric.
Returns
-------
df_esm_jcq_demand_control: pd.DataFrame
The same dataframe with a column esm_user_score containing answers recoded and reversed.
"""
df_esm_jcq_demand_control_unique_answers = (
df_esm_jcq_demand_control.groupby("question_id")
.esm_instructions.value_counts()
.rename()
.reset_index()
)
# Tabulate all possible answers to each question (group by question ID).
for q_id in dict_JCQ_demand_control_reverse.keys():
# Look through all answers that need to be reversed.
possible_answers = df_esm_jcq_demand_control_unique_answers.loc[
df_esm_jcq_demand_control_unique_answers["question_id"] == q_id,
"esm_instructions",
]
# These are all answers to a given question (by q_id).
answers_matches = possible_answers.str.startswith(
dict_JCQ_demand_control_reverse.get(q_id)
)
# See if they are expected, i.e. included in the dictionary.
if ~answers_matches.all():
print("One of the answers that occur in the data should not be reversed.")
print("This was the answer found in the data: ")
raise KeyError(possible_answers[~answers_matches])
# In case there is an unexpected answer, raise an exception.
try:
df_esm_jcq_demand_control = df_esm_jcq_demand_control.assign(
esm_user_score=lambda x: x.esm_user_answer_numeric + 1
)
# Increment the original answer by 1
# to keep in line with traditional scoring (JCQ_ORIGINAL_MIN - JCQ_ORIGINAL_MAX).
df_esm_jcq_demand_control[
df_esm_jcq_demand_control["question_id"].isin(
dict_JCQ_demand_control_reverse.keys()
)
] = df_esm_jcq_demand_control[
df_esm_jcq_demand_control["question_id"].isin(
dict_JCQ_demand_control_reverse.keys()
)
].assign(
esm_user_score=lambda x: JCQ_ORIGINAL_MAX
+ JCQ_ORIGINAL_MIN
- x.esm_user_score
)
# Reverse the items that require it.
except KeyError as e:
print("Please, clean the dataframe first using features.esm.clean_up_esm.")
print(e)
return df_esm_jcq_demand_control