stress_at_work_analysis/features/helper.py

42 lines
1.2 KiB
Python

import datetime
import pandas as pd
from pytz import timezone
TZ_LJ = timezone("Europe/Ljubljana")
COLUMN_TIMESTAMP = "timestamp"
COLUMN_TIMESTAMP_ESM = "double_esm_user_answer_timestamp"
def get_date_from_timestamp(df_aware) -> pd.DataFrame:
"""
Transform a UNIX timestamp into a datetime (with Ljubljana timezone).
Additionally, extract only the date part, where anything until 4 AM is considered the same day.
Parameters
----------
df_aware: pd.DataFrame
Any AWARE-type data as defined in models.py.
Returns
-------
df_aware: pd.DataFrame
The same dataframe with datetime_lj and date_lj columns added.
"""
if COLUMN_TIMESTAMP_ESM in df_aware:
column_timestamp = COLUMN_TIMESTAMP_ESM
else:
column_timestamp = COLUMN_TIMESTAMP
df_aware["datetime_lj"] = df_aware[column_timestamp].apply(
lambda x: datetime.datetime.fromtimestamp(x / 1000.0, tz=TZ_LJ)
)
df_aware = df_aware.assign(
date_lj=lambda x: (x.datetime_lj - datetime.timedelta(hours=4)).dt.date
)
# Since daytime EMAs could *theoretically* last beyond midnight, but never after 4 AM,
# the datetime is first translated to 4 h earlier.
return df_aware