diff --git a/features/esm.py b/features/esm.py index 67635b8..ca3a936 100644 --- a/features/esm.py +++ b/features/esm.py @@ -1,14 +1,13 @@ import datetime from collections.abc import Collection +import helper import numpy as np import pandas as pd -from pytz import timezone from config.models import ESM, Participant from setup import db_engine, session -TZ_LJ = timezone("Europe/Ljubljana") ESM_STATUS_ANSWERED = 2 GROUP_SESSIONS_BY = ["participant_id", "device_id", "esm_session"] @@ -67,14 +66,8 @@ def preprocess_esm(df_esm: pd.DataFrame) -> pd.DataFrame: df_esm_preprocessed: pd.DataFrame A dataframe with added columns: datetime in Ljubljana timezone and all fields from ESM_JSON column. """ - df_esm["datetime_lj"] = df_esm["double_esm_user_answer_timestamp"].apply( - lambda x: datetime.datetime.fromtimestamp(x / 1000.0, tz=TZ_LJ) - ) - df_esm = df_esm.assign( - date_lj=lambda x: (x.datetime_lj - datetime.timedelta(hours=4)).dt.date - ) - # Since daytime EMAs could *theoretically* last beyond midnight, but never after 4 AM, - # the datetime is first translated to 4 h earlier. + df_esm = helper.get_date_from_timestamp(df_esm) + df_esm_json = pd.json_normalize(df_esm["esm_json"]).drop( columns=["esm_trigger"] ) # The esm_trigger column is already present in the main df. diff --git a/features/helper.py b/features/helper.py new file mode 100644 index 0000000..23c4569 --- /dev/null +++ b/features/helper.py @@ -0,0 +1,19 @@ +import datetime + +import pandas as pd +from pytz import timezone + +TZ_LJ = timezone("Europe/Ljubljana") + + +def get_date_from_timestamp(df_aware) -> pd.DataFrame: + df_aware["datetime_lj"] = df_aware["double_esm_user_answer_timestamp"].apply( + lambda x: datetime.datetime.fromtimestamp(x / 1000.0, tz=TZ_LJ) + ) + df_aware = df_aware.assign( + date_lj=lambda x: (x.datetime_lj - datetime.timedelta(hours=4)).dt.date + ) + # Since daytime EMAs could *theoretically* last beyond midnight, but never after 4 AM, + # the datetime is first translated to 4 h earlier. + + return df_aware