Refactor fitbit sleep features
parent
dcc7ca14e3
commit
e133b8b530
|
@ -131,7 +131,8 @@ STEP:
|
|||
SLEEP:
|
||||
DAY_SEGMENTS: *day_segments
|
||||
SLEEP_TYPES: ["main", "nap", "all"]
|
||||
DAILY_FEATURES_FROM_SUMMARY_DATA: ["sumdurationafterwakeup", "sumdurationasleep", "sumdurationawake", "sumdurationtofallasleep", "sumdurationinbed", "avgefficiency", "countepisode"]
|
||||
# Only daily features are extracted from summary data
|
||||
SUMMARY_FEATURES: ["sumdurationafterwakeup", "sumdurationasleep", "sumdurationawake", "sumdurationtofallasleep", "sumdurationinbed", "avgefficiency", "countepisode"]
|
||||
|
||||
WIFI:
|
||||
DAY_SEGMENTS: *day_segments
|
||||
|
|
|
@ -218,8 +218,8 @@ rule fitbit_sleep_features:
|
|||
sleep_intraday_data = "data/raw/{pid}/fitbit_sleep_intraday_with_datetime.csv"
|
||||
params:
|
||||
day_segment = "{day_segment}",
|
||||
sleep_types = config["SLEEP"]["SLEEP_TYPES"],
|
||||
daily_features_from_summary_data = config["SLEEP"]["DAILY_FEATURES_FROM_SUMMARY_DATA"]
|
||||
summary_features = config["SLEEP"]["SUMMARY_FEATURES"],
|
||||
sleep_types = config["SLEEP"]["SLEEP_TYPES"]
|
||||
output:
|
||||
"data/processed/{pid}/fitbit_sleep_{day_segment}.csv"
|
||||
script:
|
||||
|
|
|
@ -0,0 +1,70 @@
|
|||
import pandas as pd
|
||||
import itertools
|
||||
|
||||
|
||||
|
||||
def dailyFeaturesFromSummaryData(sleep_daily_features, sleep_summary_data, summary_features, sleep_type):
|
||||
if sleep_type == "main":
|
||||
sleep_summary_data = sleep_summary_data[sleep_summary_data["is_main_sleep"] == 1]
|
||||
elif sleep_type == "nap":
|
||||
sleep_summary_data = sleep_summary_data[sleep_summary_data["is_main_sleep"] == 0]
|
||||
elif sleep_type == "all":
|
||||
pass
|
||||
else:
|
||||
raise ValueError("sleep_type can only be one of ['main', 'nap', 'all'].")
|
||||
|
||||
features_sum = sleep_summary_data[["minutes_after_wakeup", "minutes_asleep", "minutes_awake", "minutes_to_fall_asleep", "minutes_in_bed", "local_end_date"]].groupby(["local_end_date"]).sum()
|
||||
features_sum.index.rename("local_date", inplace=True)
|
||||
if "sumdurationafterwakeup" in summary_features:
|
||||
sleep_daily_features["sleep_daily_sumdurationafterwakeup" + sleep_type] = features_sum["minutes_after_wakeup"]
|
||||
if "sumdurationasleep" in summary_features:
|
||||
sleep_daily_features["sleep_daily_sumdurationasleep" + sleep_type] = features_sum["minutes_asleep"]
|
||||
if "sumdurationawake" in summary_features:
|
||||
sleep_daily_features["sleep_daily_sumdurationawake" + sleep_type] = features_sum["minutes_awake"]
|
||||
if "sumdurationtofallasleep" in summary_features:
|
||||
sleep_daily_features["sleep_daily_sumdurationtofallasleep" + sleep_type] = features_sum["minutes_to_fall_asleep"]
|
||||
if "sumdurationinbed" in summary_features:
|
||||
sleep_daily_features["sleep_daily_sumdurationinbed" + sleep_type] = features_sum["minutes_in_bed"]
|
||||
|
||||
features_avg = sleep_summary_data[["efficiency", "local_end_date"]].groupby(["local_end_date"]).mean()
|
||||
features_avg.index.rename("local_date", inplace=True)
|
||||
if "avgefficiency" in summary_features:
|
||||
sleep_daily_features["sleep_daily_avgefficiency" + sleep_type] = features_avg["efficiency"]
|
||||
|
||||
features_count = sleep_summary_data[["local_start_date_time", "local_end_date"]].groupby(["local_end_date"]).count()
|
||||
features_count.index.rename("local_date", inplace=True)
|
||||
if "countepisode" in summary_features:
|
||||
sleep_daily_features["sleep_daily_countepisode" + sleep_type] = features_count["local_start_date_time"]
|
||||
|
||||
return sleep_daily_features
|
||||
|
||||
def base_fitbit_sleep_features(sleep_summary_data, day_segment, requested_summary_features, requested_sleep_type):
|
||||
if not day_segment == "daily":
|
||||
return pd.DataFrame(columns=["local_date"])
|
||||
else:
|
||||
# name of the features this function can compute
|
||||
base_summary_features_names = ["sumdurationafterwakeup", "sumdurationasleep", "sumdurationawake", "sumdurationtofallasleep", "sumdurationinbed", "avgefficiency", "countepisode"]
|
||||
base_sleep_type = ["main", "nap", "all"]
|
||||
# the subset of requested features this function can compute
|
||||
summary_features_to_compute = list(set(requested_summary_features) & set(base_summary_features_names))
|
||||
sleep_type_to_compute = list(set(requested_sleep_type) & set(base_sleep_type))
|
||||
# full names
|
||||
features_fullnames_to_compute = ["".join(feature) for feature in itertools.product(summary_features_to_compute, sleep_type_to_compute)]
|
||||
|
||||
colnames_can_be_zero = ["sleep_daily_" + x for x in [col for col in features_fullnames_to_compute if "avgefficiency" not in col]]
|
||||
|
||||
if sleep_summary_data.empty:
|
||||
sleep_summary_features = pd.DataFrame(columns=["local_date"] + ["sleep_daily_" + x for x in features_fullnames_to_compute])
|
||||
else:
|
||||
|
||||
sleep_summary_features = pd.DataFrame(columns=["sleep_daily_" + x for x in features_fullnames_to_compute])
|
||||
|
||||
for sleep_type in sleep_type_to_compute:
|
||||
sleep_summary_features = dailyFeaturesFromSummaryData(sleep_summary_features, sleep_summary_data, summary_features_to_compute, sleep_type)
|
||||
|
||||
sleep_summary_features[colnames_can_be_zero] = sleep_summary_features[colnames_can_be_zero].fillna(0)
|
||||
|
||||
sleep_summary_features = sleep_summary_features.reset_index()
|
||||
|
||||
return sleep_summary_features
|
||||
|
|
@ -1,67 +1,18 @@
|
|||
import pandas as pd
|
||||
from fitbit_sleep.fitbit_sleep_base import base_fitbit_sleep_features
|
||||
import itertools
|
||||
|
||||
|
||||
|
||||
def dailyFeaturesFromSummaryData(sleep_summary_data, sleep_type):
|
||||
if sleep_type == "main":
|
||||
sleep_summary_data = sleep_summary_data[sleep_summary_data["is_main_sleep"] == 1]
|
||||
elif sleep_type == "nap":
|
||||
sleep_summary_data = sleep_summary_data[sleep_summary_data["is_main_sleep"] == 0]
|
||||
elif sleep_type == "all":
|
||||
pass
|
||||
else:
|
||||
raise ValueError("sleep_type can only be one of ['main', 'nap', 'all'].")
|
||||
|
||||
features_sum = sleep_summary_data[["minutes_after_wakeup", "minutes_asleep", "minutes_awake", "minutes_to_fall_asleep", "minutes_in_bed", "local_end_date"]].groupby(["local_end_date"]).sum()
|
||||
features_sum.index.rename("local_date", inplace=True)
|
||||
if "sumdurationafterwakeup" in daily_features_from_summary_data:
|
||||
sleep_daily_features["sleep_daily_sumdurationafterwakeup" + sleep_type] = features_sum["minutes_after_wakeup"]
|
||||
if "sumdurationasleep" in daily_features_from_summary_data:
|
||||
sleep_daily_features["sleep_daily_sumdurationasleep" + sleep_type] = features_sum["minutes_asleep"]
|
||||
if "sumdurationawake" in daily_features_from_summary_data:
|
||||
sleep_daily_features["sleep_daily_sumdurationawake" + sleep_type] = features_sum["minutes_awake"]
|
||||
if "sumdurationtofallasleep" in daily_features_from_summary_data:
|
||||
sleep_daily_features["sleep_daily_sumdurationtofallasleep" + sleep_type] = features_sum["minutes_to_fall_asleep"]
|
||||
if "sumdurationinbed" in daily_features_from_summary_data:
|
||||
sleep_daily_features["sleep_daily_sumdurationinbed" + sleep_type] = features_sum["minutes_in_bed"]
|
||||
|
||||
features_avg = sleep_summary_data[["efficiency", "local_end_date"]].groupby(["local_end_date"]).mean()
|
||||
features_avg.index.rename("local_date", inplace=True)
|
||||
if "avgefficiency" in daily_features_from_summary_data:
|
||||
sleep_daily_features["sleep_daily_avgefficiency" + sleep_type] = features_avg["efficiency"]
|
||||
|
||||
features_count = sleep_summary_data[["local_start_date_time", "local_end_date"]].groupby(["local_end_date"]).count()
|
||||
features_count.index.rename("local_date", inplace=True)
|
||||
if "countepisode" in daily_features_from_summary_data:
|
||||
sleep_daily_features["sleep_daily_count" + sleep_type] = features_count["local_start_date_time"]
|
||||
|
||||
return sleep_daily_features
|
||||
|
||||
|
||||
|
||||
sleep_summary_data = pd.read_csv(snakemake.input["sleep_summary_data"])
|
||||
sleep_types = snakemake.params["sleep_types"]
|
||||
daily_features_from_summary_data = snakemake.params["daily_features_from_summary_data"]
|
||||
requested_summary_features = snakemake.params["summary_features"]
|
||||
requested_sleep_type = snakemake.params["sleep_types"]
|
||||
day_segment = snakemake.params["day_segment"]
|
||||
sleep_features = pd.DataFrame(columns=["local_date"])
|
||||
|
||||
daily_features_can_be_zero = list(set(daily_features_from_summary_data) - set(["avgefficiency"]))
|
||||
colnames_can_be_zero = ["sleep_daily_" + x for x in ["".join(feature) for feature in itertools.product(daily_features_can_be_zero, sleep_types)]]
|
||||
sleep_features = sleep_features.merge(base_fitbit_sleep_features(sleep_summary_data, day_segment, requested_summary_features, requested_sleep_type), on="local_date", how="outer")
|
||||
|
||||
colnames = ["sleep_daily_" + x for x in ["".join(feature) for feature in itertools.product(daily_features_from_summary_data, sleep_types)]]
|
||||
requested_features = ["".join(feature) for feature in itertools.product(requested_summary_features, requested_sleep_type)] if day_segment == "daily" else []
|
||||
|
||||
if sleep_summary_data.empty:
|
||||
sleep_daily_features = pd.DataFrame(columns=["local_date"] + colnames)
|
||||
else:
|
||||
sleep_daily_features = pd.DataFrame(columns=colnames)
|
||||
for sleep_type in sleep_types:
|
||||
sleep_daily_features = dailyFeaturesFromSummaryData(sleep_summary_data, sleep_type)
|
||||
assert len(requested_features) + 1 == sleep_features.shape[1], "The number of features in the output dataframe (=" + str(sleep_features.shape[1]) + ") does not match the expected value (=" + str(len(requested_features)) + " + 1). Verify your fitbit sleep feature extraction functions"
|
||||
|
||||
sleep_daily_features[colnames_can_be_zero] = sleep_daily_features[colnames_can_be_zero].fillna(0)
|
||||
sleep_features.to_csv(snakemake.output[0], index=False)
|
||||
|
||||
|
||||
|
||||
if day_segment == "daily":
|
||||
sleep_daily_features.to_csv(snakemake.output[0])
|
||||
else:
|
||||
pd.DataFrame().to_csv(snakemake.output[0])
|
||||
|
|
Loading…
Reference in New Issue