Extract method to reuse.
parent
c688580fe8
commit
82b53bc0d3
|
@ -347,3 +347,69 @@ def increment_answers(df_esm_clean: pd.DataFrame, increment_by=1):
|
||||||
print("Please, clean the dataframe first using features.esm.clean_up_esm.")
|
print("Please, clean the dataframe first using features.esm.clean_up_esm.")
|
||||||
print(e)
|
print(e)
|
||||||
return df_esm_clean
|
return df_esm_clean
|
||||||
|
|
||||||
|
|
||||||
|
def reassign_question_ids(
|
||||||
|
df_esm_cleaned: pd.DataFrame, question_ids_content: dict
|
||||||
|
) -> pd.DataFrame:
|
||||||
|
"""
|
||||||
|
Fix question IDs to match their actual content.
|
||||||
|
|
||||||
|
Unfortunately, when altering the protocol to adapt to COVID pandemic,
|
||||||
|
we did not retain original question IDs.
|
||||||
|
This means that for participants before 2021, they are different
|
||||||
|
from for the rest of them.
|
||||||
|
This function searches for question IDs by matching their strings.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
df_esm_cleaned: pd.DataFrame
|
||||||
|
A cleaned up dataframe, which must also include esm_user_answer_numeric.
|
||||||
|
question_ids_content: dict
|
||||||
|
A dictionary, linking question IDs with their content ("instructions").
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
df_esm_fixed: pd.DataFrame
|
||||||
|
The same dataframe but with fixed question IDs.
|
||||||
|
"""
|
||||||
|
df_esm_unique_questions = (
|
||||||
|
df_esm_cleaned.groupby("question_id")
|
||||||
|
.esm_instructions.value_counts()
|
||||||
|
.rename()
|
||||||
|
.reset_index()
|
||||||
|
)
|
||||||
|
# Tabulate all possible answers to each question (group by question ID).
|
||||||
|
|
||||||
|
# First, check that we anticipated all esm instructions.
|
||||||
|
for q_id in question_ids_content.keys():
|
||||||
|
# Look for all questions ("instructions") occurring in the dataframe.
|
||||||
|
actual_questions = df_esm_unique_questions.loc[
|
||||||
|
df_esm_unique_questions["question_id"] == q_id,
|
||||||
|
"esm_instructions",
|
||||||
|
]
|
||||||
|
# These are all answers to a given question (by q_id).
|
||||||
|
questions_matches = actual_questions.str.startswith(
|
||||||
|
question_ids_content.get(q_id)
|
||||||
|
)
|
||||||
|
# See if they are expected, i.e. included in the dictionary.
|
||||||
|
if ~actual_questions.all():
|
||||||
|
print("One of the questions that occur in the data was undefined.")
|
||||||
|
print("This were the questions found in the data: ")
|
||||||
|
raise KeyError(actual_questions[~questions_matches])
|
||||||
|
# In case there is an unexpected answer, raise an exception.
|
||||||
|
|
||||||
|
# Next, replace question IDs.
|
||||||
|
df_esm_fixed = df_esm_cleaned.copy()
|
||||||
|
df_esm_fixed["question_id"] = df_esm_cleaned["esm_instructions"].apply(
|
||||||
|
lambda x: next(
|
||||||
|
(
|
||||||
|
key
|
||||||
|
for key, values in question_ids_content.items()
|
||||||
|
if x.startswith(values)
|
||||||
|
),
|
||||||
|
None,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
return df_esm_fixed
|
||||||
|
|
|
@ -1,5 +1,3 @@
|
||||||
import pandas as pd
|
|
||||||
|
|
||||||
COPE_ORIGINAL_MAX = 4
|
COPE_ORIGINAL_MAX = 4
|
||||||
COPE_ORIGINAL_MIN = 1
|
COPE_ORIGINAL_MIN = 1
|
||||||
|
|
||||||
|
@ -125,65 +123,3 @@ DICT_COPE_QUESTION_IDS = {
|
||||||
"Razburil sem se in razmišljal samo o tem",
|
"Razburil sem se in razmišljal samo o tem",
|
||||||
),
|
),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
def reassign_question_ids(df_cope_cleaned: pd.DataFrame) -> pd.DataFrame:
|
|
||||||
"""
|
|
||||||
Fix question IDs to match their actual content.
|
|
||||||
|
|
||||||
Unfortunately, when altering the protocol to adapt to COVID pandemic,
|
|
||||||
we did not retain original question IDs.
|
|
||||||
This means that for participants before 2021, they are different
|
|
||||||
from for the rest of them.
|
|
||||||
This function searches for question IDs by matching their strings.
|
|
||||||
|
|
||||||
Parameters
|
|
||||||
----------
|
|
||||||
df_cope_cleaned: pd.DataFrame
|
|
||||||
A cleaned up dataframe, which must also include esm_user_answer_numeric.
|
|
||||||
|
|
||||||
Returns
|
|
||||||
-------
|
|
||||||
df_cope_fixed: pd.DataFrame
|
|
||||||
The same dataframe but with fixed question IDs.
|
|
||||||
"""
|
|
||||||
df_esm_cope_unique_questions = (
|
|
||||||
df_cope_cleaned.groupby("question_id")
|
|
||||||
.esm_instructions.value_counts()
|
|
||||||
.rename()
|
|
||||||
.reset_index()
|
|
||||||
)
|
|
||||||
# Tabulate all possible answers to each question (group by question ID).
|
|
||||||
|
|
||||||
# First, check that we anticipated all esm instructions.
|
|
||||||
for q_id in DICT_COPE_QUESTION_IDS.keys():
|
|
||||||
# Look for all questions ("instructions") occurring in the dataframe.
|
|
||||||
actual_questions = df_esm_cope_unique_questions.loc[
|
|
||||||
df_esm_cope_unique_questions["question_id"] == q_id,
|
|
||||||
"esm_instructions",
|
|
||||||
]
|
|
||||||
# These are all answers to a given question (by q_id).
|
|
||||||
questions_matches = actual_questions.str.startswith(
|
|
||||||
DICT_COPE_QUESTION_IDS.get(q_id)
|
|
||||||
)
|
|
||||||
# See if they are expected, i.e. included in the dictionary.
|
|
||||||
if ~actual_questions.all():
|
|
||||||
print("One of the questions that occur in the data was undefined.")
|
|
||||||
print("This were the questions found in the data: ")
|
|
||||||
raise KeyError(actual_questions[~questions_matches])
|
|
||||||
# In case there is an unexpected answer, raise an exception.
|
|
||||||
|
|
||||||
# Next, replace question IDs.
|
|
||||||
df_cope_fixed = df_cope_cleaned.copy()
|
|
||||||
df_cope_fixed["question_id"] = df_cope_cleaned["esm_instructions"].apply(
|
|
||||||
lambda x: next(
|
|
||||||
(
|
|
||||||
key
|
|
||||||
for key, values in DICT_COPE_QUESTION_IDS.items()
|
|
||||||
if x.startswith(values)
|
|
||||||
),
|
|
||||||
None,
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
return df_cope_fixed
|
|
||||||
|
|
|
@ -444,65 +444,3 @@ def extract_event_duration(df_esm_sam_clean: pd.DataFrame) -> pd.DataFrame:
|
||||||
|
|
||||||
# TODO: How many questions about the stressfulness of the period were asked
|
# TODO: How many questions about the stressfulness of the period were asked
|
||||||
# and how does this relate to events?
|
# and how does this relate to events?
|
||||||
|
|
||||||
|
|
||||||
def reassign_question_ids(df_sam_cleaned: pd.DataFrame) -> pd.DataFrame:
|
|
||||||
"""
|
|
||||||
Fix question IDs to match their actual content.
|
|
||||||
|
|
||||||
Unfortunately, when altering the protocol to adapt to COVID pandemic,
|
|
||||||
we did not retain original question IDs.
|
|
||||||
This means that for participants before 2021, they are different
|
|
||||||
from for the rest of them.
|
|
||||||
This function searches for question IDs by matching their strings.
|
|
||||||
|
|
||||||
Parameters
|
|
||||||
----------
|
|
||||||
df_sam_cleaned: pd.DataFrame
|
|
||||||
A cleaned up dataframe, which must also include esm_user_answer_numeric.
|
|
||||||
|
|
||||||
Returns
|
|
||||||
-------
|
|
||||||
df_sam_fixed: pd.DataFrame
|
|
||||||
The same dataframe but with fixed question IDs.
|
|
||||||
"""
|
|
||||||
df_esm_sam_unique_questions = (
|
|
||||||
df_sam_cleaned.groupby("question_id")
|
|
||||||
.esm_instructions.value_counts()
|
|
||||||
.rename()
|
|
||||||
.reset_index()
|
|
||||||
)
|
|
||||||
# Tabulate all possible answers to each question (group by question ID).
|
|
||||||
|
|
||||||
# First, check that we anticipated all esm instructions.
|
|
||||||
for q_id in DICT_SAM_QUESTION_IDS.keys():
|
|
||||||
# Look for all questions ("instructions") occurring in the dataframe.
|
|
||||||
actual_questions = df_esm_sam_unique_questions.loc[
|
|
||||||
df_esm_sam_unique_questions["question_id"] == q_id,
|
|
||||||
"esm_instructions",
|
|
||||||
]
|
|
||||||
# These are all answers to a given question (by q_id).
|
|
||||||
questions_matches = actual_questions.str.startswith(
|
|
||||||
DICT_SAM_QUESTION_IDS.get(q_id)
|
|
||||||
)
|
|
||||||
# See if they are expected, i.e. included in the dictionary.
|
|
||||||
if ~actual_questions.all():
|
|
||||||
print("One of the questions that occur in the data was undefined.")
|
|
||||||
print("This were the questions found in the data: ")
|
|
||||||
raise KeyError(actual_questions[~questions_matches])
|
|
||||||
# In case there is an unexpected answer, raise an exception.
|
|
||||||
|
|
||||||
# Next, replace question IDs.
|
|
||||||
df_sam_fixed = df_sam_cleaned.copy()
|
|
||||||
df_sam_fixed["question_id"] = df_sam_cleaned["esm_instructions"].apply(
|
|
||||||
lambda x: next(
|
|
||||||
(
|
|
||||||
key
|
|
||||||
for key, values in DICT_SAM_QUESTION_IDS.items()
|
|
||||||
if x.startswith(values)
|
|
||||||
),
|
|
||||||
None,
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
return df_sam_fixed
|
|
||||||
|
|
Loading…
Reference in New Issue