From 98f1df81c69067b583525353f7b629a8bf7e8327 Mon Sep 17 00:00:00 2001 From: junos Date: Wed, 11 Aug 2021 17:26:44 +0200 Subject: [PATCH] Use the same function for ESM and other data. --- exploration/ex_ml_pipeline.py | 3 ++- features/esm.py | 3 +-- features/helper.py | 24 +++++++++++++++++++++++- 3 files changed, 26 insertions(+), 4 deletions(-) diff --git a/exploration/ex_ml_pipeline.py b/exploration/ex_ml_pipeline.py index b613a81..3f5c85f 100644 --- a/exploration/ex_ml_pipeline.py +++ b/exploration/ex_ml_pipeline.py @@ -27,7 +27,7 @@ if nb_dir not in sys.path: # %% import participants.query_db -from features import esm, proximity +from features import esm, helper, proximity # %% [markdown] # # 1. Get the relevant data @@ -58,6 +58,7 @@ df_esm_PANAS_clean = esm.clean_up_esm(df_esm_PANAS) # %% df_proximity = proximity.get_proximity_data(ptcp_2) +df_proximity = helper.get_date_from_timestamp(df_proximity) df_proximity = proximity.recode_proximity(df_proximity) # %% [markdown] diff --git a/features/esm.py b/features/esm.py index ca3a936..066542b 100644 --- a/features/esm.py +++ b/features/esm.py @@ -1,11 +1,10 @@ -import datetime from collections.abc import Collection -import helper import numpy as np import pandas as pd from config.models import ESM, Participant +from features import helper from setup import db_engine, session ESM_STATUS_ANSWERED = 2 diff --git a/features/helper.py b/features/helper.py index 23c4569..b20074f 100644 --- a/features/helper.py +++ b/features/helper.py @@ -4,10 +4,32 @@ import pandas as pd from pytz import timezone TZ_LJ = timezone("Europe/Ljubljana") +COLUMN_TIMESTAMP = "timestamp" +COLUMN_TIMESTAMP_ESM = "double_esm_user_answer_timestamp" def get_date_from_timestamp(df_aware) -> pd.DataFrame: - df_aware["datetime_lj"] = df_aware["double_esm_user_answer_timestamp"].apply( + """ + Transform a UNIX timestamp into a datetime (with Ljubljana timezone). + Additionally, extract only the date part, where anything until 4 AM is considered the same day. + + Parameters + ---------- + df_aware: pd.DataFrame + Any AWARE-type data as defined in models.py. + + Returns + ------- + df_aware: pd.DataFrame + The same dataframe with datetime_lj and date_lj columns added. + + """ + if COLUMN_TIMESTAMP_ESM in df_aware: + column_timestamp = COLUMN_TIMESTAMP_ESM + else: + column_timestamp = COLUMN_TIMESTAMP + + df_aware["datetime_lj"] = df_aware[column_timestamp].apply( lambda x: datetime.datetime.fromtimestamp(x / 1000.0, tz=TZ_LJ) ) df_aware = df_aware.assign(