rapids/src/features/fitbit_sleep_intraday/episodes/sleep_intraday_episodes.py

38 lines
1.9 KiB
Python

import pandas as pd
import numpy as np
def mergeSleepEpisodes(sleep_data, cols_for_groupby):
sleep_episodes = pd.DataFrame(columns=["device_id", "type_episode_id", "level_episode_id", "level", "unified_level", "is_main_sleep", "type", "timestamp", "duration"])
if not sleep_data.empty:
sleep_data = sleep_data.groupby(by=cols_for_groupby, sort=False)
sleep_episodes = sleep_data[["timestamp"]].first()
sleep_episodes["duration"] = sleep_data["duration"].sum()
return sleep_episodes
sleep_intraday = pd.read_csv(snakemake.input["sleep_intraday"])
# discard useless columns
for col in ["local_timezone", "local_date_time", "local_date", "local_time", "local_hour", "local_minute", "assigned_segments"]:
del sleep_intraday[col]
# Extract "unified_level" based on "level" field
# For "classic" type, "unified_level" is one of {0, 1} where 0: awake {"awake" + "restless"}, 1: asleep {"asleep"}
# For "stages" type, "unified_level" is one of {0, 1} where 0: awake {"wake"}, 1: asleep {"deep" + "light" + "rem"}
sleep_intraday["unified_level"] = np.where(sleep_intraday["level"].isin(["awake", "restless", "wake"]), 0, 1)
# Put consecutive rows with the same "level" field together and merge episodes
sleep_intraday.insert(2, "level_episode_id", (sleep_intraday[["type_episode_id", "level"]] != sleep_intraday[["type_episode_id", "level"]].shift()).any(axis=1).cumsum())
sleep_intraday_episodes = mergeSleepEpisodes(sleep_intraday, ["device_id", "type_episode_id", "level_episode_id", "level", "unified_level", "is_main_sleep", "type"])
# Generate "start_timestamp" and "end_timestamp"
sleep_intraday_episodes["end_timestamp"] = sleep_intraday_episodes["timestamp"] + ((sleep_intraday_episodes["duration"] - 1) * 1000) + 999
sleep_intraday_episodes.rename(columns={"timestamp": "start_timestamp"}, inplace=True)
del sleep_intraday_episodes["duration"]
sleep_intraday_episodes.to_csv(snakemake.output[0], index=True)