Move datetime calculation to a separate function.

communication
junos 2021-08-11 17:19:14 +02:00
parent 070cfdba80
commit ad85f79bc5
2 changed files with 22 additions and 10 deletions

View File

@ -1,14 +1,13 @@
import datetime import datetime
from collections.abc import Collection from collections.abc import Collection
import helper
import numpy as np import numpy as np
import pandas as pd import pandas as pd
from pytz import timezone
from config.models import ESM, Participant from config.models import ESM, Participant
from setup import db_engine, session from setup import db_engine, session
TZ_LJ = timezone("Europe/Ljubljana")
ESM_STATUS_ANSWERED = 2 ESM_STATUS_ANSWERED = 2
GROUP_SESSIONS_BY = ["participant_id", "device_id", "esm_session"] GROUP_SESSIONS_BY = ["participant_id", "device_id", "esm_session"]
@ -67,14 +66,8 @@ def preprocess_esm(df_esm: pd.DataFrame) -> pd.DataFrame:
df_esm_preprocessed: pd.DataFrame df_esm_preprocessed: pd.DataFrame
A dataframe with added columns: datetime in Ljubljana timezone and all fields from ESM_JSON column. A dataframe with added columns: datetime in Ljubljana timezone and all fields from ESM_JSON column.
""" """
df_esm["datetime_lj"] = df_esm["double_esm_user_answer_timestamp"].apply( df_esm = helper.get_date_from_timestamp(df_esm)
lambda x: datetime.datetime.fromtimestamp(x / 1000.0, tz=TZ_LJ)
)
df_esm = df_esm.assign(
date_lj=lambda x: (x.datetime_lj - datetime.timedelta(hours=4)).dt.date
)
# Since daytime EMAs could *theoretically* last beyond midnight, but never after 4 AM,
# the datetime is first translated to 4 h earlier.
df_esm_json = pd.json_normalize(df_esm["esm_json"]).drop( df_esm_json = pd.json_normalize(df_esm["esm_json"]).drop(
columns=["esm_trigger"] columns=["esm_trigger"]
) # The esm_trigger column is already present in the main df. ) # The esm_trigger column is already present in the main df.

19
features/helper.py 100644
View File

@ -0,0 +1,19 @@
import datetime
import pandas as pd
from pytz import timezone
TZ_LJ = timezone("Europe/Ljubljana")
def get_date_from_timestamp(df_aware) -> pd.DataFrame:
df_aware["datetime_lj"] = df_aware["double_esm_user_answer_timestamp"].apply(
lambda x: datetime.datetime.fromtimestamp(x / 1000.0, tz=TZ_LJ)
)
df_aware = df_aware.assign(
date_lj=lambda x: (x.datetime_lj - datetime.timedelta(hours=4)).dt.date
)
# Since daytime EMAs could *theoretically* last beyond midnight, but never after 4 AM,
# the datetime is first translated to 4 h earlier.
return df_aware