114 lines
4.2 KiB
Python
114 lines
4.2 KiB
Python
import pandas as pd
|
|
|
|
from features.esm import increment_answers
|
|
|
|
JCQ_ORIGINAL_MAX = 4
|
|
JCQ_ORIGINAL_MIN = 1
|
|
|
|
DICT_JCQ_DEMAND_CONTROL_REVERSE = {
|
|
75: (
|
|
"I was NOT asked",
|
|
"Men legde mij geen overdreven",
|
|
"Men legde mij GEEN overdreven", # Capitalized in some versions
|
|
"Od mene se NI zahtevalo",
|
|
),
|
|
76: (
|
|
"I had enough time to do my work",
|
|
"Ik had voldoende tijd om mijn werk",
|
|
"Imela sem dovolj časa, da končam",
|
|
"Imel sem dovolj časa, da končam",
|
|
),
|
|
77: (
|
|
"I was free of conflicting demands",
|
|
"Er werden mij op het werk geen tegenstrijdige",
|
|
"Er werden mij op het werk GEEN tegenstrijdige", # Capitalized in some versions
|
|
"Pri svojem delu se NISEM srečeval",
|
|
),
|
|
79: (
|
|
"My job involved a lot of repetitive work",
|
|
"Mijn taak omvatte veel repetitief werk",
|
|
"Moje delo je vključevalo veliko ponavljajočega",
|
|
),
|
|
85: (
|
|
"On my job, I had very little freedom",
|
|
"In mijn taak had ik zeer weinig vrijheid",
|
|
"Pri svojem delu sem imel zelo malo svobode",
|
|
"Pri svojem delu sem imela zelo malo svobode",
|
|
),
|
|
}
|
|
|
|
|
|
def reverse_jcq_demand_control_scoring(
|
|
df_esm_jcq_demand_control: pd.DataFrame,
|
|
) -> pd.DataFrame:
|
|
"""
|
|
Reverse JCQ demand and control answers.
|
|
|
|
This function recodes answers in Job content questionnaire
|
|
by first incrementing them by 1, to be in line with original (1-4) scoring.
|
|
Then, some answers are reversed (i.e. 1 becomes 4 etc.),
|
|
because the questions are negatively phrased.
|
|
These answers are listed in DICT_JCQ_DEMAND_CONTROL_REVERSE
|
|
and identified by their question ID.
|
|
However, the existing data is checked against literal phrasing of these questions
|
|
to protect against wrong numbering of questions (differing question IDs).
|
|
|
|
Parameters
|
|
----------
|
|
df_esm_jcq_demand_control: pd.DataFrame
|
|
A cleaned up dataframe, which must also include esm_user_answer_numeric.
|
|
|
|
Returns
|
|
-------
|
|
df_esm_jcq_demand_control: pd.DataFrame
|
|
The same dataframe with a column esm_user_score
|
|
containing answers recoded and reversed.
|
|
"""
|
|
df_esm_jcq_demand_control_unique_answers = (
|
|
df_esm_jcq_demand_control.groupby("question_id")
|
|
.esm_instructions.value_counts()
|
|
.rename()
|
|
.reset_index()
|
|
)
|
|
# Tabulate all possible answers to each question (group by question ID).
|
|
for q_id in DICT_JCQ_DEMAND_CONTROL_REVERSE.keys():
|
|
# Look through all answers that need to be reversed.
|
|
possible_answers = df_esm_jcq_demand_control_unique_answers.loc[
|
|
df_esm_jcq_demand_control_unique_answers["question_id"] == q_id,
|
|
"esm_instructions",
|
|
]
|
|
# These are all answers to a given question (by q_id).
|
|
answers_matches = possible_answers.str.startswith(
|
|
DICT_JCQ_DEMAND_CONTROL_REVERSE.get(q_id)
|
|
)
|
|
# See if they are expected, i.e. included in the dictionary.
|
|
if ~answers_matches.all():
|
|
print("One of the answers that occur in the data should not be reversed.")
|
|
print("This was the answer found in the data: ")
|
|
raise KeyError(possible_answers[~answers_matches])
|
|
# In case there is an unexpected answer, raise an exception.
|
|
|
|
try:
|
|
df_esm_jcq_demand_control = increment_answers(df_esm_jcq_demand_control)
|
|
# Increment the original answer by 1 to keep in line
|
|
# with traditional scoring (from JCQ_ORIGINAL_MIN to JCQ_ORIGINAL_MAX).
|
|
df_esm_jcq_demand_control[
|
|
df_esm_jcq_demand_control["question_id"].isin(
|
|
DICT_JCQ_DEMAND_CONTROL_REVERSE.keys()
|
|
)
|
|
] = df_esm_jcq_demand_control[
|
|
df_esm_jcq_demand_control["question_id"].isin(
|
|
DICT_JCQ_DEMAND_CONTROL_REVERSE.keys()
|
|
)
|
|
].assign(
|
|
esm_user_score=lambda x: JCQ_ORIGINAL_MAX
|
|
+ JCQ_ORIGINAL_MIN
|
|
- x.esm_user_score
|
|
)
|
|
# Reverse the items that require it.
|
|
except AttributeError as e:
|
|
print("Please, clean the dataframe first using features.esm.clean_up_esm.")
|
|
print(e)
|
|
|
|
return df_esm_jcq_demand_control
|