Specify format directly as infer_datetime_format was deprecated.

master
junos 2023-05-16 17:04:48 +02:00
parent 9417a1b9f1
commit 118e686491
1 changed files with 23 additions and 13 deletions

View File

@ -20,7 +20,8 @@ GROUP_QUESTIONNAIRES_BY = [
"device_id", "device_id",
"esm_session", "esm_session",
] ]
# Each questionnaire occurs only once within each esm_session on the same device within the same participant. # Each questionnaire occurs only once within each esm_session on the same device
# within the same participant.
def extract_stressful_events(df_esm: pd.DataFrame) -> pd.DataFrame: def extract_stressful_events(df_esm: pd.DataFrame) -> pd.DataFrame:
@ -78,7 +79,8 @@ def extract_stressful_events(df_esm: pd.DataFrame) -> pd.DataFrame:
def calculate_threat_challenge_means(df_esm_sam_clean: pd.DataFrame) -> pd.DataFrame: def calculate_threat_challenge_means(df_esm_sam_clean: pd.DataFrame) -> pd.DataFrame:
""" """
This function calculates challenge and threat (two Stress Appraisal Measure subscales) means, This function calculates challenge and threat
(two Stress Appraisal Measure subscales) means,
for each ESM session (within participants and devices). for each ESM session (within participants and devices).
It creates a grouped dataframe with means in two columns. It creates a grouped dataframe with means in two columns.
@ -90,7 +92,8 @@ def calculate_threat_challenge_means(df_esm_sam_clean: pd.DataFrame) -> pd.DataF
Returns Returns
------- -------
df_esm_event_threat_challenge_mean_wide: pd.DataFrame df_esm_event_threat_challenge_mean_wide: pd.DataFrame
A dataframe of unique ESM sessions (by participants and devices) with threat and challenge means. A dataframe of unique ESM sessions (by participants and devices)
with threat and challenge means.
""" """
# Select only threat and challenge assessments for events # Select only threat and challenge assessments for events
df_esm_event_threat_challenge = df_esm_sam_clean[ df_esm_event_threat_challenge = df_esm_sam_clean[
@ -112,8 +115,8 @@ def calculate_threat_challenge_means(df_esm_sam_clean: pd.DataFrame) -> pd.DataF
aggfunc="mean", aggfunc="mean",
) )
# Drop unnecessary column values. # Drop unnecessary column values.
df_esm_event_threat_challenge_mean_wide.columns = df_esm_event_threat_challenge_mean_wide.columns.get_level_values( df_esm_event_threat_challenge_mean_wide.columns = (
1 df_esm_event_threat_challenge_mean_wide.columns.get_level_values(1)
) )
df_esm_event_threat_challenge_mean_wide.columns.name = None df_esm_event_threat_challenge_mean_wide.columns.name = None
df_esm_event_threat_challenge_mean_wide.rename( df_esm_event_threat_challenge_mean_wide.rename(
@ -189,10 +192,12 @@ def detect_event_work_related(df_esm_sam_clean: pd.DataFrame) -> pd.DataFrame:
def convert_event_time(df_esm_sam_clean: pd.DataFrame) -> pd.DataFrame: def convert_event_time(df_esm_sam_clean: pd.DataFrame) -> pd.DataFrame:
""" """
This function only serves to convert the string datetime answer into a real datetime type. This function only serves to convert the string datetime answer
Errors during this conversion are coerced, meaning that non-datetime answers are assigned Not a Time (NaT). into a real datetime type.
NOTE: Since the only available non-datetime answer to this question was "0 - I do not remember", Errors during this conversion are coerced, meaning that non-datetime answers
the NaTs can be interpreted to mean this. are assigned Not a Time (NaT).
NOTE: Since the only available non-datetime answer to this question was
"0 - I do not remember", the NaTs can be interpreted to mean this.
Parameters Parameters
---------- ----------
@ -208,9 +213,10 @@ def convert_event_time(df_esm_sam_clean: pd.DataFrame) -> pd.DataFrame:
df_esm_sam_clean["questionnaire_id"] == QUESTIONNAIRE_ID_SAM.get("event_time") df_esm_sam_clean["questionnaire_id"] == QUESTIONNAIRE_ID_SAM.get("event_time")
].assign( ].assign(
event_time=lambda x: pd.to_datetime( event_time=lambda x: pd.to_datetime(
x.esm_user_answer, errors="coerce", infer_datetime_format=True, exact=True x.esm_user_answer, errors="coerce", format="Y-m-d H:M:S %z", exact=True
) )
) )
# Example answer: 2020-09-29 00:05:00 +0200
return df_esm_event_time return df_esm_event_time
@ -241,9 +247,12 @@ def extract_event_duration(df_esm_sam_clean: pd.DataFrame) -> pd.DataFrame:
== QUESTIONNAIRE_ID_SAM.get("event_duration") == QUESTIONNAIRE_ID_SAM.get("event_duration")
].assign( ].assign(
event_duration=lambda x: pd.to_datetime( event_duration=lambda x: pd.to_datetime(
x.esm_user_answer.str.slice(start=0, stop=-6), errors="coerce" x.esm_user_answer.str.slice(start=0, stop=-6),
errors="coerce",
format="Y-m-d H:M:S",
).dt.time ).dt.time
) )
# Example answer: 2020-09-29 00:05:00 +0200
# TODO Explore the values recorded in event_duration and possibly fix mistakes. # TODO Explore the values recorded in event_duration and possibly fix mistakes.
# For example, participants reported setting 23:50:00 instead of 00:50:00. # For example, participants reported setting 23:50:00 instead of 00:50:00.
@ -251,7 +260,7 @@ def extract_event_duration(df_esm_sam_clean: pd.DataFrame) -> pd.DataFrame:
# we can determine whether: # we can determine whether:
# - this event is still going on ("1 - It is still going on") # - this event is still going on ("1 - It is still going on")
# - the participant couldn't remember it's duration ("0 - I do not remember") # - the participant couldn't remember it's duration ("0 - I do not remember")
# Generally, these answers were converted to esm_user_answer_numeric in clean_up_esm, # Generally, these answers were converted to esm_user_answer_numeric in clean_up_esm
# but only the numeric types of questions and answers. # but only the numeric types of questions and answers.
# Since this was of "datetime" type, convert these specific answers here again. # Since this was of "datetime" type, convert these specific answers here again.
df_esm_event_duration["event_duration_info"] = np.nan df_esm_event_duration["event_duration_info"] = np.nan
@ -264,4 +273,5 @@ def extract_event_duration(df_esm_sam_clean: pd.DataFrame) -> pd.DataFrame:
return df_esm_event_duration return df_esm_event_duration
# TODO: How many questions about the stressfulness of the period were asked and how does this relate to events? # TODO: How many questions about the stressfulness of the period were asked
# and how does this relate to events?