diff --git a/config.yaml b/config.yaml index a36d9360..a5e5cdf6 100644 --- a/config.yaml +++ b/config.yaml @@ -131,7 +131,8 @@ STEP: SLEEP: DAY_SEGMENTS: *day_segments SLEEP_TYPES: ["main", "nap", "all"] - DAILY_FEATURES_FROM_SUMMARY_DATA: ["sumdurationafterwakeup", "sumdurationasleep", "sumdurationawake", "sumdurationtofallasleep", "sumdurationinbed", "avgefficiency", "countepisode"] + # Only daily features are extracted from summary data + SUMMARY_FEATURES: ["sumdurationafterwakeup", "sumdurationasleep", "sumdurationawake", "sumdurationtofallasleep", "sumdurationinbed", "avgefficiency", "countepisode"] WIFI: DAY_SEGMENTS: *day_segments diff --git a/rules/features.snakefile b/rules/features.snakefile index 7f1a1088..7f3ac7f4 100644 --- a/rules/features.snakefile +++ b/rules/features.snakefile @@ -218,8 +218,8 @@ rule fitbit_sleep_features: sleep_intraday_data = "data/raw/{pid}/fitbit_sleep_intraday_with_datetime.csv" params: day_segment = "{day_segment}", - sleep_types = config["SLEEP"]["SLEEP_TYPES"], - daily_features_from_summary_data = config["SLEEP"]["DAILY_FEATURES_FROM_SUMMARY_DATA"] + summary_features = config["SLEEP"]["SUMMARY_FEATURES"], + sleep_types = config["SLEEP"]["SLEEP_TYPES"] output: "data/processed/{pid}/fitbit_sleep_{day_segment}.csv" script: diff --git a/src/features/fitbit_sleep/fitbit_sleep_base.py b/src/features/fitbit_sleep/fitbit_sleep_base.py new file mode 100644 index 00000000..fa654b7d --- /dev/null +++ b/src/features/fitbit_sleep/fitbit_sleep_base.py @@ -0,0 +1,70 @@ +import pandas as pd +import itertools + + + +def dailyFeaturesFromSummaryData(sleep_daily_features, sleep_summary_data, summary_features, sleep_type): + if sleep_type == "main": + sleep_summary_data = sleep_summary_data[sleep_summary_data["is_main_sleep"] == 1] + elif sleep_type == "nap": + sleep_summary_data = sleep_summary_data[sleep_summary_data["is_main_sleep"] == 0] + elif sleep_type == "all": + pass + else: + raise ValueError("sleep_type can only be one of ['main', 'nap', 'all'].") + + features_sum = sleep_summary_data[["minutes_after_wakeup", "minutes_asleep", "minutes_awake", "minutes_to_fall_asleep", "minutes_in_bed", "local_end_date"]].groupby(["local_end_date"]).sum() + features_sum.index.rename("local_date", inplace=True) + if "sumdurationafterwakeup" in summary_features: + sleep_daily_features["sleep_daily_sumdurationafterwakeup" + sleep_type] = features_sum["minutes_after_wakeup"] + if "sumdurationasleep" in summary_features: + sleep_daily_features["sleep_daily_sumdurationasleep" + sleep_type] = features_sum["minutes_asleep"] + if "sumdurationawake" in summary_features: + sleep_daily_features["sleep_daily_sumdurationawake" + sleep_type] = features_sum["minutes_awake"] + if "sumdurationtofallasleep" in summary_features: + sleep_daily_features["sleep_daily_sumdurationtofallasleep" + sleep_type] = features_sum["minutes_to_fall_asleep"] + if "sumdurationinbed" in summary_features: + sleep_daily_features["sleep_daily_sumdurationinbed" + sleep_type] = features_sum["minutes_in_bed"] + + features_avg = sleep_summary_data[["efficiency", "local_end_date"]].groupby(["local_end_date"]).mean() + features_avg.index.rename("local_date", inplace=True) + if "avgefficiency" in summary_features: + sleep_daily_features["sleep_daily_avgefficiency" + sleep_type] = features_avg["efficiency"] + + features_count = sleep_summary_data[["local_start_date_time", "local_end_date"]].groupby(["local_end_date"]).count() + features_count.index.rename("local_date", inplace=True) + if "countepisode" in summary_features: + sleep_daily_features["sleep_daily_countepisode" + sleep_type] = features_count["local_start_date_time"] + + return sleep_daily_features + +def base_fitbit_sleep_features(sleep_summary_data, day_segment, requested_summary_features, requested_sleep_type): + if not day_segment == "daily": + return pd.DataFrame(columns=["local_date"]) + else: + # name of the features this function can compute + base_summary_features_names = ["sumdurationafterwakeup", "sumdurationasleep", "sumdurationawake", "sumdurationtofallasleep", "sumdurationinbed", "avgefficiency", "countepisode"] + base_sleep_type = ["main", "nap", "all"] + # the subset of requested features this function can compute + summary_features_to_compute = list(set(requested_summary_features) & set(base_summary_features_names)) + sleep_type_to_compute = list(set(requested_sleep_type) & set(base_sleep_type)) + # full names + features_fullnames_to_compute = ["".join(feature) for feature in itertools.product(summary_features_to_compute, sleep_type_to_compute)] + + colnames_can_be_zero = ["sleep_daily_" + x for x in [col for col in features_fullnames_to_compute if "avgefficiency" not in col]] + + if sleep_summary_data.empty: + sleep_summary_features = pd.DataFrame(columns=["local_date"] + ["sleep_daily_" + x for x in features_fullnames_to_compute]) + else: + + sleep_summary_features = pd.DataFrame(columns=["sleep_daily_" + x for x in features_fullnames_to_compute]) + + for sleep_type in sleep_type_to_compute: + sleep_summary_features = dailyFeaturesFromSummaryData(sleep_summary_features, sleep_summary_data, summary_features_to_compute, sleep_type) + + sleep_summary_features[colnames_can_be_zero] = sleep_summary_features[colnames_can_be_zero].fillna(0) + + sleep_summary_features = sleep_summary_features.reset_index() + + return sleep_summary_features + diff --git a/src/features/fitbit_sleep_features.py b/src/features/fitbit_sleep_features.py index 7ae3d394..314c13d5 100644 --- a/src/features/fitbit_sleep_features.py +++ b/src/features/fitbit_sleep_features.py @@ -1,67 +1,18 @@ import pandas as pd +from fitbit_sleep.fitbit_sleep_base import base_fitbit_sleep_features import itertools - - -def dailyFeaturesFromSummaryData(sleep_summary_data, sleep_type): - if sleep_type == "main": - sleep_summary_data = sleep_summary_data[sleep_summary_data["is_main_sleep"] == 1] - elif sleep_type == "nap": - sleep_summary_data = sleep_summary_data[sleep_summary_data["is_main_sleep"] == 0] - elif sleep_type == "all": - pass - else: - raise ValueError("sleep_type can only be one of ['main', 'nap', 'all'].") - - features_sum = sleep_summary_data[["minutes_after_wakeup", "minutes_asleep", "minutes_awake", "minutes_to_fall_asleep", "minutes_in_bed", "local_end_date"]].groupby(["local_end_date"]).sum() - features_sum.index.rename("local_date", inplace=True) - if "sumdurationafterwakeup" in daily_features_from_summary_data: - sleep_daily_features["sleep_daily_sumdurationafterwakeup" + sleep_type] = features_sum["minutes_after_wakeup"] - if "sumdurationasleep" in daily_features_from_summary_data: - sleep_daily_features["sleep_daily_sumdurationasleep" + sleep_type] = features_sum["minutes_asleep"] - if "sumdurationawake" in daily_features_from_summary_data: - sleep_daily_features["sleep_daily_sumdurationawake" + sleep_type] = features_sum["minutes_awake"] - if "sumdurationtofallasleep" in daily_features_from_summary_data: - sleep_daily_features["sleep_daily_sumdurationtofallasleep" + sleep_type] = features_sum["minutes_to_fall_asleep"] - if "sumdurationinbed" in daily_features_from_summary_data: - sleep_daily_features["sleep_daily_sumdurationinbed" + sleep_type] = features_sum["minutes_in_bed"] - - features_avg = sleep_summary_data[["efficiency", "local_end_date"]].groupby(["local_end_date"]).mean() - features_avg.index.rename("local_date", inplace=True) - if "avgefficiency" in daily_features_from_summary_data: - sleep_daily_features["sleep_daily_avgefficiency" + sleep_type] = features_avg["efficiency"] - - features_count = sleep_summary_data[["local_start_date_time", "local_end_date"]].groupby(["local_end_date"]).count() - features_count.index.rename("local_date", inplace=True) - if "countepisode" in daily_features_from_summary_data: - sleep_daily_features["sleep_daily_count" + sleep_type] = features_count["local_start_date_time"] - - return sleep_daily_features - - - sleep_summary_data = pd.read_csv(snakemake.input["sleep_summary_data"]) -sleep_types = snakemake.params["sleep_types"] -daily_features_from_summary_data = snakemake.params["daily_features_from_summary_data"] +requested_summary_features = snakemake.params["summary_features"] +requested_sleep_type = snakemake.params["sleep_types"] day_segment = snakemake.params["day_segment"] +sleep_features = pd.DataFrame(columns=["local_date"]) -daily_features_can_be_zero = list(set(daily_features_from_summary_data) - set(["avgefficiency"])) -colnames_can_be_zero = ["sleep_daily_" + x for x in ["".join(feature) for feature in itertools.product(daily_features_can_be_zero, sleep_types)]] +sleep_features = sleep_features.merge(base_fitbit_sleep_features(sleep_summary_data, day_segment, requested_summary_features, requested_sleep_type), on="local_date", how="outer") -colnames = ["sleep_daily_" + x for x in ["".join(feature) for feature in itertools.product(daily_features_from_summary_data, sleep_types)]] +requested_features = ["".join(feature) for feature in itertools.product(requested_summary_features, requested_sleep_type)] if day_segment == "daily" else [] -if sleep_summary_data.empty: - sleep_daily_features = pd.DataFrame(columns=["local_date"] + colnames) -else: - sleep_daily_features = pd.DataFrame(columns=colnames) - for sleep_type in sleep_types: - sleep_daily_features = dailyFeaturesFromSummaryData(sleep_summary_data, sleep_type) +assert len(requested_features) + 1 == sleep_features.shape[1], "The number of features in the output dataframe (=" + str(sleep_features.shape[1]) + ") does not match the expected value (=" + str(len(requested_features)) + " + 1). Verify your fitbit sleep feature extraction functions" - sleep_daily_features[colnames_can_be_zero] = sleep_daily_features[colnames_can_be_zero].fillna(0) +sleep_features.to_csv(snakemake.output[0], index=False) - - -if day_segment == "daily": - sleep_daily_features.to_csv(snakemake.output[0]) -else: - pd.DataFrame().to_csv(snakemake.output[0])