diff --git a/features/esm.py b/features/esm.py index e1cbf8b..e40d4f6 100644 --- a/features/esm.py +++ b/features/esm.py @@ -54,7 +54,8 @@ def get_esm_data(usernames: Collection) -> pd.DataFrame: def preprocess_esm(df_esm: pd.DataFrame) -> pd.DataFrame: """ - Convert timestamps into human-readable datetimes and expand the JSON column into several Pandas DF columns. + Convert timestamps into human-readable datetimes and dates + and expand the JSON column into several Pandas DF columns. Parameters ---------- @@ -69,6 +70,11 @@ def preprocess_esm(df_esm: pd.DataFrame) -> pd.DataFrame: df_esm["datetime_lj"] = df_esm["double_esm_user_answer_timestamp"].apply( lambda x: datetime.datetime.fromtimestamp(x / 1000.0, tz=TZ_LJ) ) + df_esm = df_esm.assign( + date_lj=lambda x: (x.datetime_lj - datetime.timedelta(hours=4)).dt.date + ) + # Since daytime EMAs could *theoretically* last beyond midnight, but never after 4 AM, + # the datetime is first translated to 4 h earlier. df_esm_json = pd.json_normalize(df_esm["esm_json"]).drop( columns=["esm_trigger"] ) # The esm_trigger column is already present in the main df.