rapids/src/features/phone_esm/straw/esm_JCQ.py

109 lines
4.2 KiB
Python
Raw Normal View History

2022-03-16 18:55:30 +01:00
import pandas as pd
JCQ_ORIGINAL_MAX = 4
JCQ_ORIGINAL_MIN = 1
dict_JCQ_demand_control_reverse = {
75: (
"I was NOT asked",
"Men legde mij geen overdreven",
"Men legde mij GEEN overdreven", # Capitalized in some versions
"Od mene se NI zahtevalo",
),
76: (
"I had enough time to do my work",
"Ik had voldoende tijd om mijn werk",
"Imela sem dovolj časa, da končam",
"Imel sem dovolj časa, da končam",
),
77: (
"I was free of conflicting demands",
"Er werden mij op het werk geen tegenstrijdige",
"Er werden mij op het werk GEEN tegenstrijdige", # Capitalized in some versions
"Pri svojem delu se NISEM srečeval",
),
79: (
"My job involved a lot of repetitive work",
"Mijn taak omvatte veel repetitief werk",
"Moje delo je vključevalo veliko ponavljajočega",
),
85: (
"On my job, I had very little freedom",
"In mijn taak had ik zeer weinig vrijheid",
"Pri svojem delu sem imel zelo malo svobode",
"Pri svojem delu sem imela zelo malo svobode",
),
}
def reverse_jcq_demand_control_scoring(
df_esm_jcq_demand_control: pd.DataFrame,
) -> pd.DataFrame:
"""
This function recodes answers in Job content questionnaire by first incrementing them by 1,
to be in line with original (1-4) scoring.
Then, some answers are reversed (i.e. 1 becomes 4 etc.), because the questions are negatively phrased.
These answers are listed in dict_JCQ_demand_control_reverse and identified by their question ID.
However, the existing data is checked against literal phrasing of these questions
to protect against wrong numbering of questions (differing question IDs).
Parameters
----------
df_esm_jcq_demand_control: pd.DataFrame
A cleaned up dataframe, which must also include esm_user_answer_numeric.
Returns
-------
df_esm_jcq_demand_control: pd.DataFrame
The same dataframe with a column esm_user_score containing answers recoded and reversed.
"""
df_esm_jcq_demand_control_unique_answers = (
df_esm_jcq_demand_control.groupby("question_id")
.esm_instructions.value_counts()
.rename()
.reset_index()
)
# Tabulate all possible answers to each question (group by question ID).
for q_id in dict_JCQ_demand_control_reverse.keys():
# Look through all answers that need to be reversed.
possible_answers = df_esm_jcq_demand_control_unique_answers.loc[
df_esm_jcq_demand_control_unique_answers["question_id"] == q_id,
"esm_instructions",
]
# These are all answers to a given question (by q_id).
answers_matches = possible_answers.str.startswith(
dict_JCQ_demand_control_reverse.get(q_id)
)
# See if they are expected, i.e. included in the dictionary.
if ~answers_matches.all():
print("One of the answers that occur in the data should not be reversed.")
print("This was the answer found in the data: ")
raise KeyError(possible_answers[~answers_matches])
# In case there is an unexpected answer, raise an exception.
try:
df_esm_jcq_demand_control = df_esm_jcq_demand_control.assign(
esm_user_score=lambda x: x.esm_user_answer_numeric + 1
)
# Increment the original answer by 1
# to keep in line with traditional scoring (JCQ_ORIGINAL_MIN - JCQ_ORIGINAL_MAX).
df_esm_jcq_demand_control[
df_esm_jcq_demand_control["question_id"].isin(
dict_JCQ_demand_control_reverse.keys()
)
] = df_esm_jcq_demand_control[
df_esm_jcq_demand_control["question_id"].isin(
dict_JCQ_demand_control_reverse.keys()
)
].assign(
esm_user_score=lambda x: JCQ_ORIGINAL_MAX
+ JCQ_ORIGINAL_MIN
- x.esm_user_score
)
# Reverse the items that require it.
except AttributeError as e:
print("Please, clean the dataframe first using features.esm.clean_up_esm.")
print(e)
return df_esm_jcq_demand_control