Update sleep summary features for segments

pull/103/head
Meng Li 2020-11-23 12:01:00 -05:00
parent 10384204a1
commit deba6b9e4f
9 changed files with 241 additions and 251 deletions

View File

@ -147,10 +147,6 @@ for provider in config["PHONE_LOCATIONS"]["PROVIDERS"].keys():
if config["FITBIT_CALORIES"]["TABLE_FORMAT"] not in ["JSON", "CSV"]:
raise ValueError("config['FITBIT_CALORIES']['TABLE_FORMAT'] should be JSON or CSV but you typed" + config["FITBIT_CALORIES"]["TABLE_FORMAT"])
if config["FITBIT_SLEEP"]["TABLE_FORMAT"] not in ["JSON", "CSV"]:
raise ValueError("config['FITBIT_SLEEP']['TABLE_FORMAT'] should be JSON or CSV but you typed" + config["FITBIT_SLEEP"]["TABLE_FORMAT"])
for provider in config["FITBIT_HEARTRATE_SUMMARY"]["PROVIDERS"].keys():
if config["FITBIT_HEARTRATE_SUMMARY"]["PROVIDERS"][provider]["COMPUTE"]:
files_to_compute.extend(expand("data/raw/{pid}/fitbit_heartrate_summary_raw.csv", pid=config["PIDS"]))
@ -167,6 +163,20 @@ for provider in config["FITBIT_HEARTRATE_INTRADAY"]["PROVIDERS"].keys():
files_to_compute.extend(expand("data/interim/{pid}/fitbit_heartrate_intraday_features/fitbit_heartrate_intraday_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["FITBIT_HEARTRATE_INTRADAY"]["PROVIDERS"][provider]["SRC_LANGUAGE"].lower(), provider_key=provider.lower()))
files_to_compute.extend(expand("data/processed/features/{pid}/fitbit_heartrate_intraday.csv", pid=config["PIDS"]))
for provider in config["FITBIT_SLEEP_SUMMARY"]["PROVIDERS"].keys():
if config["FITBIT_SLEEP_SUMMARY"]["PROVIDERS"][provider]["COMPUTE"]:
files_to_compute.extend(expand("data/raw/{pid}/fitbit_sleep_summary_raw.csv", pid=config["PIDS"]))
files_to_compute.extend(expand("data/raw/{pid}/fitbit_sleep_summary_parsed.csv", pid=config["PIDS"]))
files_to_compute.extend(expand("data/raw/{pid}/fitbit_sleep_summary_parsed_with_datetime.csv", pid=config["PIDS"]))
files_to_compute.extend(expand("data/interim/{pid}/fitbit_sleep_summary_features/fitbit_sleep_summary_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["FITBIT_SLEEP_SUMMARY"]["PROVIDERS"][provider]["SRC_LANGUAGE"].lower(), provider_key=provider.lower()))
files_to_compute.extend(expand("data/processed/features/{pid}/fitbit_sleep_summary.csv", pid=config["PIDS"]))
# for provider in config["FITBIT_SLEEP_INTRADAY"]["PROVIDERS"].keys():
# if config["FITBIT_SLEEP_INTRADAY"]["PROVIDERS"][provider]["COMPUTE"]:
# files_to_compute.extend(expand("data/raw/{pid}/fitbit_sleep_intraday_raw.csv", pid=config["PIDS"]))
# files_to_compute.extend(expand("data/raw/{pid}/fitbit_sleep_intraday_parsed.csv", pid=config["PIDS"]))
# files_to_compute.extend(expand("data/raw/{pid}/fitbit_sleep_intraday_parsed_with_datetime.csv", pid=config["PIDS"]))
for provider in config["FITBIT_STEPS_SUMMARY"]["PROVIDERS"].keys():
if config["FITBIT_STEPS_SUMMARY"]["PROVIDERS"][provider]["COMPUTE"]:
files_to_compute.extend(expand("data/raw/{pid}/fitbit_steps_summary_raw.csv", pid=config["PIDS"]))
@ -189,13 +199,6 @@ for provider in config["FITBIT_CALORIES"]["PROVIDERS"].keys():
files_to_compute.extend(expand("data/raw/{pid}/fitbit_calories_{fitbit_data_type}_parsed.csv", pid=config["PIDS"], fitbit_data_type=["summary", "intraday"]))
files_to_compute.extend(expand("data/raw/{pid}/fitbit_calories_{fitbit_data_type}_parsed_with_datetime.csv", pid=config["PIDS"], fitbit_data_type=["summary", "intraday"]))
for provider in config["FITBIT_SLEEP"]["PROVIDERS"].keys():
if config["FITBIT_SLEEP"]["PROVIDERS"][provider]["COMPUTE"]:
files_to_compute.extend(expand("data/raw/{pid}/fitbit_sleep_{fitbit_data_type}_raw.csv", pid=config["PIDS"], fitbit_data_type=(["json"] if config["FITBIT_SLEEP"]["TABLE_FORMAT"] == "JSON" else ["summary", "intraday"])))
files_to_compute.extend(expand("data/raw/{pid}/fitbit_sleep_{fitbit_data_type}_parsed_episodes.csv", pid=config["PIDS"], fitbit_data_type=["summary"]))
files_to_compute.extend(expand("data/raw/{pid}/fitbit_sleep_{fitbit_data_type}_parsed.csv", pid=config["PIDS"], fitbit_data_type=["intraday"]))
files_to_compute.extend(expand("data/raw/{pid}/fitbit_sleep_{fitbit_data_type}_parsed_with_datetime.csv", pid=config["PIDS"], fitbit_data_type=["intraday"]))
# visualization for data exploration
if config["HEATMAP_FEATURES_CORRELATIONS"]["PLOT"]:
files_to_compute.extend(expand("reports/data_exploration/{min_valid_hours_per_day}hours_{min_valid_bins_per_hour}bins/heatmap_features_correlations.html", min_valid_hours_per_day=config["HEATMAP_FEATURES_CORRELATIONS"]["MIN_VALID_HOURS_PER_DAY"], min_valid_bins_per_hour=config["PHONE_VALID_SENSED_DAYS"]["MIN_VALID_BINS_PER_HOUR"]))

View File

@ -300,18 +300,16 @@ FITBIT_STEPS_INTRADAY:
SRC_FOLDER: "rapids" # inside src/features/fitbit_steps_intraday
SRC_LANGUAGE: "python"
FITBIT_SLEEP:
TABLE_FORMAT: JSON # JSON or CSV. If your JSON or CSV data are files change [DEVICE_DATA][FITBIT][SOURCE][TYPE] to FILES
TABLE:
JSON: fitbit_sleep
CSV:
SUMMARY: sleep_summary
INTRADAY: sleep_intraday
FITBIT_SLEEP_SUMMARY:
TABLE: sleep_summary
SLEEP_EPISODE_TIMESTAMP: end # summary sleep episodes are considered as events based on either the start timestamp or end timestamp.
PROVIDERS:
RAPIDS:
COMPUTE: False
FEATURES: ["countepisode", "avgefficiency", "sumdurationafterwakeup", "sumdurationasleep", "sumdurationawake", "sumdurationtofallasleep", "sumdurationinbed", "avgdurationafterwakeup", "avgdurationasleep", "avgdurationawake", "avgdurationtofallasleep", "avgdurationinbed"]
SLEEP_TYPES: ["main", "nap", "all"]
SUMMARY_FEATURES: ["sumdurationafterwakeup", "sumdurationasleep", "sumdurationawake", "sumdurationtofallasleep", "sumdurationinbed", "avgefficiency", "countepisode"]
SRC_FOLDER: "rapids" # inside src/features/fitbit_sleep_summary
SRC_LANGUAGE: "python"
FITBIT_CALORIES:
TABLE_FORMAT: JSON # JSON or CSV. If your JSON or CSV data are files change [DEVICE_DATA][FITBIT][SOURCE][TYPE] to FILES

View File

@ -476,6 +476,32 @@ rule fitbit_steps_intraday_r_features:
script:
"../src/features/entry.R"
rule fitbit_sleep_summary_python_features:
input:
sensor_data = "data/raw/{pid}/fitbit_sleep_summary_parsed_with_datetime.csv",
day_segments_labels = "data/interim/day_segments/{pid}_day_segments_labels.csv"
params:
provider = lambda wildcards: config["FITBIT_SLEEP_SUMMARY"]["PROVIDERS"][wildcards.provider_key.upper()],
provider_key = "{provider_key}",
sensor_key = "fitbit_sleep_summary"
output:
"data/interim/{pid}/fitbit_sleep_summary_features/fitbit_sleep_summary_python_{provider_key}.csv"
script:
"../src/features/entry.py"
rule fitbit_sleep_summary_r_features:
input:
sensor_data = "data/raw/{pid}/fitbit_sleep_summary_parsed_with_datetime.csv",
day_segments_labels = "data/interim/day_segments/{pid}_day_segments_labels.csv"
params:
provider = lambda wildcards: config["FITBIT_SLEEP_SUMMARY"]["PROVIDERS"][wildcards.provider_key.upper()],
provider_key = "{provider_key}",
sensor_key = "fitbit_sleep_summary"
output:
"data/interim/{pid}/fitbit_sleep_summary_features/fitbit_sleep_summary_r_{provider_key}.csv"
script:
"../src/features/entry.R"
# rule fitbit_sleep_features:
# input:
# sleep_summary_data = "data/raw/{pid}/fitbit_sleep_summary_with_datetime.csv",

View File

@ -206,6 +206,20 @@ rule fitbit_parse_steps:
script:
"../src/data/fitbit_parse_steps.py"
rule fitbit_parse_sleep:
input:
"data/raw/{pid}/fitbit_sleep_{fitbit_data_type}_raw.csv"
params:
timezone = config["DEVICE_DATA"]["PHONE"]["TIMEZONE"]["VALUE"],
table = lambda wildcards: config["FITBIT_SLEEP_"+str(wildcards.fitbit_data_type).upper()]["TABLE"],
column_format = config["DEVICE_DATA"]["FITBIT"]["SOURCE"]["COLUMN_FORMAT"],
fitbit_data_type = "{fitbit_data_type}",
sleep_episode_timestamp = config["FITBIT_SLEEP_SUMMARY"]["SLEEP_EPISODE_TIMESTAMP"]
output:
"data/raw/{pid}/fitbit_sleep_{fitbit_data_type}_parsed.csv"
script:
"../src/data/fitbit_parse_sleep.py"
rule fitbit_parse_calories:
input:
data = expand("data/raw/{{pid}}/fitbit_calories_{fitbit_data_type}_raw.csv", fitbit_data_type = (["json"] if config["FITBIT_CALORIES"]["TABLE_FORMAT"] == "JSON" else ["summary", "intraday"]))
@ -219,19 +233,6 @@ rule fitbit_parse_calories:
script:
"../src/data/fitbit_parse_calories.py"
rule fitbit_parse_sleep:
input:
data = expand("data/raw/{{pid}}/fitbit_sleep_{fitbit_data_type}_raw.csv", fitbit_data_type = (["json"] if config["FITBIT_SLEEP"]["TABLE_FORMAT"] == "JSON" else ["summary", "intraday"]))
params:
timezone = config["DEVICE_DATA"]["PHONE"]["TIMEZONE"]["VALUE"],
table = config["FITBIT_SLEEP"]["TABLE"],
table_format = config["FITBIT_SLEEP"]["TABLE_FORMAT"]
output:
summary_data = "data/raw/{pid}/fitbit_sleep_summary_parsed_episodes.csv",
intraday_data = "data/raw/{pid}/fitbit_sleep_intraday_parsed.csv"
script:
"../src/data/fitbit_parse_sleep.py"
rule fitbit_readable_datetime:
input:
sensor_input = "data/raw/{pid}/fitbit_{sensor}_{fitbit_data_type}_parsed.csv",

View File

@ -1,9 +1,8 @@
import json
import pandas as pd
from datetime import datetime
import numpy as np
from datetime import datetime, timedelta
import dateutil.parser
from datetime import timedelta
SLEEP_CODE2LEVEL = ["asleep", "restless", "awake"]
@ -12,7 +11,7 @@ SLEEP_SUMMARY_COLUMNS_V1_2 = ("device_id", "efficiency",
"minutes_after_wakeup", "minutes_asleep", "minutes_awake", "minutes_to_fall_asleep", "minutes_in_bed",
"is_main_sleep", "type",
"local_start_date_time", "local_end_date_time",
"start_timestamp", "end_timestamp")
"timestamp")
SLEEP_SUMMARY_COLUMNS_V1 = SLEEP_SUMMARY_COLUMNS_V1_2 + ("count_awake", "duration_awake", "count_awakenings", "count_restless", "duration_restless")
SLEEP_INTRADAY_COLUMNS = ("device_id",
@ -71,29 +70,30 @@ def classicData1min(data_summary):
newRow = {'dateTime':dateutil.parser.parse(origEntry['dateTime'])+timedelta(seconds=counter*timeDuration),'level':origEntry['level'],'seconds':timeDuration}
dataList.append(newRow)
counter = counter + 1
# print(dataList)
return dataList
# Parse one record for sleep API version 1
def parseOneRecordForV1(record, device_id, d_is_main_sleep, records_summary, records_intraday):
# Summary data
# Parse one record for sleep API version 1
def parseOneRecordForV1(record, device_id, d_is_main_sleep, records_summary, records_intraday, fitbit_data_type):
sleep_record_type = "classic"
d_start_datetime = datetime.strptime(record["startTime"][:18], "%Y-%m-%dT%H:%M:%S")
d_end_datetime = datetime.strptime(record["endTime"][:18], "%Y-%m-%dT%H:%M:%S")
# Summary data
if fitbit_data_type == "summary":
row_summary = (device_id, record["efficiency"],
record["minutesAfterWakeup"], record["minutesAsleep"], record["minutesAwake"], record["minutesToFallAsleep"], record["timeInBed"],
d_is_main_sleep, sleep_record_type,
d_start_datetime, d_end_datetime,
d_start_datetime.date(), d_end_datetime.date(),
0,0,
0,
record["awakeCount"], record["awakeDuration"], record["awakeningsCount"],
record["restlessCount"], record["restlessDuration"])
records_summary.append(row_summary)
# Intraday data
if fitbit_data_type == "intraday":
start_date = d_start_datetime.date()
end_date = d_end_datetime.date()
is_before_midnight = True
@ -119,14 +119,15 @@ def parseOneRecordForV1(record, device_id, d_is_main_sleep, records_summary, rec
return records_summary, records_intraday
# Parse one record for sleep API version 1.2
def parseOneRecordForV12(record, device_id, d_is_main_sleep, records_summary, records_intraday):
def parseOneRecordForV12(record, device_id, d_is_main_sleep, records_summary, records_intraday, fitbit_data_type):
# Summary data
sleep_record_type = record['type']
d_start_datetime = datetime.strptime(record["startTime"][:18], "%Y-%m-%dT%H:%M:%S")
d_end_datetime = datetime.strptime(record["endTime"][:18], "%Y-%m-%dT%H:%M:%S")
# Summary data
if fitbit_data_type == "summary":
row_summary = (device_id, record["efficiency"],
record["minutesAfterWakeup"], record["minutesAsleep"], record["minutesAwake"], record["minutesToFallAsleep"], record["timeInBed"],
d_is_main_sleep, sleep_record_type,
@ -134,8 +135,10 @@ def parseOneRecordForV12(record, device_id, d_is_main_sleep, records_summary, re
0,0)
records_summary.append(row_summary)
if sleep_record_type == 'classic':
# Intraday data
if fitbit_data_type == "intraday":
if sleep_record_type == 'classic':
start_date = d_start_datetime.date()
end_date = d_end_datetime.date()
is_before_midnight = True
@ -156,7 +159,7 @@ def parseOneRecordForV12(record, device_id, d_is_main_sleep, records_summary, re
d_datetime, 0)
records_intraday.append(row_intraday)
else:
## for sleep type "stages"
# For sleep type "stages"
start_date = d_start_datetime.date()
end_date = d_end_datetime.date()
is_before_midnight = True
@ -182,7 +185,7 @@ def parseOneRecordForV12(record, device_id, d_is_main_sleep, records_summary, re
def parseSleepData(sleep_data):
def parseSleepData(sleep_data, fitbit_data_type):
SLEEP_SUMMARY_COLUMNS = SLEEP_SUMMARY_COLUMNS_V1_2
if sleep_data.empty:
return pd.DataFrame(columns=SLEEP_SUMMARY_COLUMNS), pd.DataFrame(columns=SLEEP_INTRADAY_COLUMNS)
@ -197,32 +200,54 @@ def parseSleepData(sleep_data):
# For sleep API version 1
if "awakeCount" in record:
SLEEP_SUMMARY_COLUMNS = SLEEP_SUMMARY_COLUMNS_V1
records_summary, records_intraday = parseOneRecordForV1(record, device_id, d_is_main_sleep, records_summary, records_intraday)
records_summary, records_intraday = parseOneRecordForV1(record, device_id, d_is_main_sleep, records_summary, records_intraday, fitbit_data_type)
# For sleep API version 1.2
else:
SLEEP_SUMMARY_COLUMNS = SLEEP_SUMMARY_COLUMNS_V1_2
records_summary, records_intraday = parseOneRecordForV12(record, device_id, d_is_main_sleep, records_summary, records_intraday)
records_summary, records_intraday = parseOneRecordForV12(record, device_id, d_is_main_sleep, records_summary, records_intraday, fitbit_data_type)
if fitbit_data_type == "summary":
parsed_data = pd.DataFrame(data=records_summary, columns=SLEEP_SUMMARY_COLUMNS)
elif fitbit_data_type == "intraday":
parsed_data = pd.DataFrame(data=records_intraday, columns=SLEEP_INTRADAY_COLUMNS)
else:
raise ValueError("fitbit_data_type can only be one of ['summary', 'intraday'].")
return parsed_data
return pd.DataFrame(data=records_summary, columns=SLEEP_SUMMARY_COLUMNS), pd.DataFrame(data=records_intraday, columns=SLEEP_INTRADAY_COLUMNS)
table_format = snakemake.params["table_format"]
timezone = snakemake.params["timezone"]
column_format = snakemake.params["column_format"]
fitbit_data_type = snakemake.params["fitbit_data_type"]
sleep_episode_timestamp = snakemake.params["sleep_episode_timestamp"]
if table_format == "JSON":
if column_format == "JSON":
json_raw = pd.read_csv(snakemake.input[0])
summary, intraday = parseSleepData(json_raw)
elif table_format == "CSV":
summary = pd.read_csv(snakemake.input[0], parse_dates=["local_start_date_time", "local_end_date_time"], date_parser=lambda col: pd.to_datetime(col).tz_localize(None))
intraday = pd.read_csv(snakemake.input[1], parse_dates=["local_date_time"], date_parser=lambda col: pd.to_datetime(col).tz_localize(None))
parsed_data = parseSleepData(json_raw, fitbit_data_type)
elif column_format == "PLAIN_TEXT":
if fitbit_data_type == "summary":
parsed_data = pd.read_csv(snakemake.input[0], parse_dates=["local_start_date_time", "local_end_date_time"], date_parser=lambda col: pd.to_datetime(col).tz_localize(None))
elif fitbit_data_type == "intraday":
parsed_data = pd.read_csv(snakemake.input[1], parse_dates=["local_date_time"], date_parser=lambda col: pd.to_datetime(col).tz_localize(None))
else:
raise ValueError("fitbit_data_type can only be one of ['summary', 'intraday'].")
else:
raise ValueError("column_format can only be one of ['JSON', 'PLAIN_TEXT'].")
if summary.shape[0] > 0:
summary["start_timestamp"] = summary["local_start_date_time"].dt.tz_localize(timezone).astype(np.int64) // 10**6
summary["end_timestamp"] = summary["local_end_date_time"].dt.tz_localize(timezone).astype(np.int64) // 10**6
if intraday.shape[0] > 0:
intraday["timestamp"] = intraday["local_date_time"].dt.tz_localize(timezone).astype(np.int64) // 10**6
if parsed_data.shape[0] > 0 and fitbit_data_type == "summary":
if sleep_episode_timestamp == "start":
parsed_data["timestamp"] = parsed_data["local_start_date_time"].dt.tz_localize(timezone).astype(np.int64) // 10**6
elif sleep_episode_timestamp == "end":
parsed_data["timestamp"] = parsed_data["local_end_date_time"].dt.tz_localize(timezone).astype(np.int64) // 10**6
else:
raise ValueError("SLEEP_EPISODE_TIMESTAMP can only be one of ['start', 'end'].")
# Drop useless columns: local_start_date_time and local_end_date_time
parsed_data.drop(["local_start_date_time", "local_end_date_time"], axis = 1, inplace=True)
if parsed_data.shape[0] > 0 and fitbit_data_type == "intraday":
parsed_data["timestamp"] = parsed_data["local_date_time"].dt.tz_localize(timezone).astype(np.int64) // 10**6
# Unifying level
intraday["unified_level"] = np.where(intraday["level"].isin(["awake", "wake", "restless"]), 0, 1)
parsed_data["unified_level"] = np.where(parsed_data["level"].isin(["awake", "wake", "restless"]), 0, 1)
summary.to_csv(snakemake.output["summary_data"], index=False)
intraday.to_csv(snakemake.output["intraday_data"], index=False)
parsed_data.to_csv(snakemake.output[0], index=False)

View File

@ -1,70 +0,0 @@
import pandas as pd
import itertools
def dailyFeaturesFromSummaryData(sleep_daily_features, sleep_summary_data, summary_features, sleep_type):
if sleep_type == "main":
sleep_summary_data = sleep_summary_data[sleep_summary_data["is_main_sleep"] == 1]
elif sleep_type == "nap":
sleep_summary_data = sleep_summary_data[sleep_summary_data["is_main_sleep"] == 0]
elif sleep_type == "all":
pass
else:
raise ValueError("sleep_type can only be one of ['main', 'nap', 'all'].")
features_sum = sleep_summary_data[["minutes_after_wakeup", "minutes_asleep", "minutes_awake", "minutes_to_fall_asleep", "minutes_in_bed", "local_end_date"]].groupby(["local_end_date"]).sum()
features_sum.index.rename("local_date", inplace=True)
if "sumdurationafterwakeup" in summary_features:
sleep_daily_features = sleep_daily_features.join(features_sum[["minutes_after_wakeup"]], how="outer").rename(columns={"minutes_after_wakeup": "sleep_daily_sumdurationafterwakeup" + sleep_type})
if "sumdurationasleep" in summary_features:
sleep_daily_features = sleep_daily_features.join(features_sum[["minutes_asleep"]], how="outer").rename(columns={"minutes_asleep": "sleep_daily_sumdurationasleep" + sleep_type})
if "sumdurationawake" in summary_features:
sleep_daily_features = sleep_daily_features.join(features_sum[["minutes_awake"]], how="outer").rename(columns={"minutes_awake": "sleep_daily_sumdurationawake" + sleep_type})
if "sumdurationtofallasleep" in summary_features:
sleep_daily_features = sleep_daily_features.join(features_sum[["minutes_to_fall_asleep"]], how="outer").rename(columns={"minutes_to_fall_asleep": "sleep_daily_sumdurationtofallasleep" + sleep_type})
if "sumdurationinbed" in summary_features:
sleep_daily_features = sleep_daily_features.join(features_sum[["minutes_in_bed"]], how="outer").rename(columns={"minutes_in_bed": "sleep_daily_sumdurationinbed" + sleep_type})
features_avg = sleep_summary_data[["efficiency", "local_end_date"]].groupby(["local_end_date"]).mean()
features_avg.index.rename("local_date", inplace=True)
if "avgefficiency" in summary_features:
sleep_daily_features = sleep_daily_features.join(features_avg[["efficiency"]], how="outer").rename(columns={"efficiency": "sleep_daily_avgefficiency" + sleep_type})
features_count = sleep_summary_data[["local_start_date_time", "local_end_date"]].groupby(["local_end_date"]).count()
features_count.index.rename("local_date", inplace=True)
if "countepisode" in summary_features:
sleep_daily_features = sleep_daily_features.join(features_count[["local_start_date_time"]], how="outer").rename(columns={"local_start_date_time": "sleep_daily_countepisode" + sleep_type})
return sleep_daily_features
def base_fitbit_sleep_features(sleep_summary_data, day_segment, requested_summary_features, requested_sleep_type):
if not day_segment == "daily":
return pd.DataFrame(columns=["local_date"])
else:
# name of the features this function can compute
base_summary_features_names = ["sumdurationafterwakeup", "sumdurationasleep", "sumdurationawake", "sumdurationtofallasleep", "sumdurationinbed", "avgefficiency", "countepisode"]
base_sleep_type = ["main", "nap", "all"]
# the subset of requested features this function can compute
summary_features_to_compute = list(set(requested_summary_features) & set(base_summary_features_names))
sleep_type_to_compute = list(set(requested_sleep_type) & set(base_sleep_type))
# full names
features_fullnames_to_compute = ["".join(feature) for feature in itertools.product(summary_features_to_compute, sleep_type_to_compute)]
colnames_can_be_zero = ["sleep_daily_" + x for x in [col for col in features_fullnames_to_compute if "avgefficiency" not in col]]
if sleep_summary_data.empty:
sleep_summary_features = pd.DataFrame(columns=["local_date"] + ["sleep_daily_" + x for x in features_fullnames_to_compute])
else:
sleep_summary_features = pd.DataFrame()
for sleep_type in sleep_type_to_compute:
sleep_summary_features = dailyFeaturesFromSummaryData(sleep_summary_features, sleep_summary_data, summary_features_to_compute, sleep_type)
sleep_summary_features[colnames_can_be_zero] = sleep_summary_features[colnames_can_be_zero].fillna(0)
sleep_summary_features = sleep_summary_features.reset_index()
return sleep_summary_features

View File

@ -1,18 +0,0 @@
import pandas as pd
from fitbit_sleep.fitbit_sleep_base import base_fitbit_sleep_features
import itertools
sleep_summary_data = pd.read_csv(snakemake.input["sleep_summary_data"])
requested_summary_features = snakemake.params["summary_features"]
requested_sleep_type = snakemake.params["sleep_types"]
day_segment = snakemake.params["day_segment"]
sleep_features = pd.DataFrame(columns=["local_date"])
sleep_features = sleep_features.merge(base_fitbit_sleep_features(sleep_summary_data, day_segment, requested_summary_features, requested_sleep_type), on="local_date", how="outer")
requested_features = ["".join(feature) for feature in itertools.product(requested_summary_features, requested_sleep_type)] if day_segment == "daily" else []
assert len(requested_features) + 1 == sleep_features.shape[1], "The number of features in the output dataframe (=" + str(sleep_features.shape[1]) + ") does not match the expected value (=" + str(len(requested_features)) + " + 1). Verify your fitbit sleep feature extraction functions"
sleep_features.to_csv(snakemake.output[0], index=False)

View File

@ -0,0 +1,91 @@
import pandas as pd
import itertools
def extractSleepFeaturesFromSummaryData(sleep_summary_data, summary_features, sleep_type, sleep_summary_features):
if sleep_type == "main":
sleep_summary_data = sleep_summary_data[sleep_summary_data["is_main_sleep"] == 1]
elif sleep_type == "nap":
sleep_summary_data = sleep_summary_data[sleep_summary_data["is_main_sleep"] == 0]
elif sleep_type == "all":
pass
else:
raise ValueError("sleep_type can only be one of ['main', 'nap', 'all'].")
features_sum = sleep_summary_data[["local_segment", "minutes_after_wakeup", "minutes_asleep", "minutes_awake", "minutes_to_fall_asleep", "minutes_in_bed"]].groupby(["local_segment"]).sum()
if "summarysumdurationafterwakeup" in summary_features:
sleep_summary_features = sleep_summary_features.join(features_sum[["minutes_after_wakeup"]], how="outer").rename(columns={"minutes_after_wakeup": "sleep_rapids_summarysumdurationafterwakeup" + sleep_type})
if "summarysumdurationasleep" in summary_features:
sleep_summary_features = sleep_summary_features.join(features_sum[["minutes_asleep"]], how="outer").rename(columns={"minutes_asleep": "sleep_rapids_summarysumdurationasleep" + sleep_type})
if "summarysumdurationawake" in summary_features:
sleep_summary_features = sleep_summary_features.join(features_sum[["minutes_awake"]], how="outer").rename(columns={"minutes_awake": "sleep_rapids_summarysumdurationawake" + sleep_type})
if "summarysumdurationtofallasleep" in summary_features:
sleep_summary_features = sleep_summary_features.join(features_sum[["minutes_to_fall_asleep"]], how="outer").rename(columns={"minutes_to_fall_asleep": "sleep_rapids_summarysumdurationtofallasleep" + sleep_type})
if "summarysumdurationinbed" in summary_features:
sleep_summary_features = sleep_summary_features.join(features_sum[["minutes_in_bed"]], how="outer").rename(columns={"minutes_in_bed": "sleep_rapids_summarysumdurationinbed" + sleep_type})
features_avg = sleep_summary_data[["local_segment", "efficiency", "minutes_after_wakeup", "minutes_asleep", "minutes_awake", "minutes_to_fall_asleep", "minutes_in_bed"]].groupby(["local_segment"]).mean()
if "summaryavgefficiency" in summary_features:
sleep_summary_features = sleep_summary_features.join(features_avg[["efficiency"]], how="outer").rename(columns={"efficiency": "sleep_rapids_summaryavgefficiency" + sleep_type})
if "summaryavgdurationafterwakeup" in summary_features:
sleep_summary_features = sleep_summary_features.join(features_avg[["minutes_after_wakeup"]], how="outer").rename(columns={"minutes_after_wakeup": "sleep_rapids_summaryavgdurationafterwakeup" + sleep_type})
if "summaryavgdurationasleep" in summary_features:
sleep_summary_features = sleep_summary_features.join(features_avg[["minutes_asleep"]], how="outer").rename(columns={"minutes_asleep": "sleep_rapids_summaryavgdurationasleep" + sleep_type})
if "summaryavgdurationawake" in summary_features:
sleep_summary_features = sleep_summary_features.join(features_avg[["minutes_awake"]], how="outer").rename(columns={"minutes_awake": "sleep_rapids_summaryavgdurationawake" + sleep_type})
if "summaryavgdurationtofallasleep" in summary_features:
sleep_summary_features = sleep_summary_features.join(features_avg[["minutes_to_fall_asleep"]], how="outer").rename(columns={"minutes_to_fall_asleep": "sleep_rapids_summaryavgdurationtofallasleep" + sleep_type})
if "summaryavgdurationinbed" in summary_features:
sleep_summary_features = sleep_summary_features.join(features_avg[["minutes_in_bed"]], how="outer").rename(columns={"minutes_in_bed": "sleep_rapids_summaryavgdurationinbed" + sleep_type})
features_count = sleep_summary_data[["local_segment", "timestamp"]].groupby(["local_segment"]).count()
if "summarycountepisode" in summary_features:
sleep_summary_features = sleep_summary_features.join(features_count[["timestamp"]], how="outer").rename(columns={"timestamp": "sleep_rapids_summarycountepisode" + sleep_type})
return sleep_summary_features
def rapids_features(sensor_data_files, day_segment, provider, filter_data_by_segment, *args, **kwargs):
sleep_summary_data = pd.read_csv(sensor_data_files["sensor_data"])
requested_summary_features = ["summary" + x for x in provider["FEATURES"]]
requested_sleep_types = provider["SLEEP_TYPES"]
# name of the features this function can compute
base_summary_features = ["summarycountepisode", "summaryavgefficiency", "summarysumdurationafterwakeup", "summarysumdurationasleep", "summarysumdurationawake", "summarysumdurationtofallasleep", "summarysumdurationinbed", "summaryavgdurationafterwakeup", "summaryavgdurationasleep", "summaryavgdurationawake", "summaryavgdurationtofallasleep", "summaryavgdurationinbed"]
base_sleep_types = ["main", "nap", "all"]
# the subset of requested features this function can compute
summary_features_to_compute = list(set(requested_summary_features) & set(base_summary_features))
sleep_types_to_compute = list(set(requested_sleep_types) & set(base_sleep_types))
# full names
features_fullnames_to_compute = ["".join(feature) for feature in itertools.product(summary_features_to_compute, sleep_types_to_compute)]
colnames_can_be_zero = ["sleep_rapids_" + x for x in [col for col in features_fullnames_to_compute if "summaryavgefficiency" not in col]]
# extract features from summary data
sleep_summary_features = pd.DataFrame(columns=["local_segment"] + ["sleep_rapids_" + x for x in features_fullnames_to_compute])
if not sleep_summary_data.empty:
sleep_summary_data = filter_data_by_segment(sleep_summary_data, day_segment)
if not sleep_summary_data.empty:
# only keep the segments start at 00:00:00 and end at 23:59:59
datetime_start_regex = "[0-9]{4}[\\-|\\/][0-9]{2}[\\-|\\/][0-9]{2} 00:00:00"
datetime_end_regex = "[0-9]{4}[\\-|\\/][0-9]{2}[\\-|\\/][0-9]{2} 23:59:59"
segment_regex = "{}#{},{}".format(day_segment, datetime_start_regex, datetime_end_regex)
sleep_summary_data = sleep_summary_data[sleep_summary_data["local_segment"].str.match(segment_regex)]
if not sleep_summary_data.empty:
sleep_summary_features = pd.DataFrame()
for sleep_type in sleep_types_to_compute:
sleep_summary_features = extractSleepFeaturesFromSummaryData(sleep_summary_data, summary_features_to_compute, sleep_type, sleep_summary_features)
sleep_summary_features[colnames_can_be_zero] = sleep_summary_features[colnames_can_be_zero].fillna(0)
sleep_summary_features = sleep_summary_features.reset_index()
return sleep_summary_features

View File

@ -1,66 +0,0 @@
import pandas as pd
import numpy as np
import time
from fitbit_step.fitbit_step_base import base_fitbit_step_features
def isInvalidTime(str_time):
try:
time.strptime(str_time, '%H:%M')
return False
except ValueError:
return True
def isInMainSleep(local_date_time, sleep):
# sleep_period_container = sleep.query("local_start_date_time <= @local_date_time <= local_end_date_time")
sleep_period_container = sleep[(sleep["local_start_date_time"] <= local_date_time) & (local_date_time <= sleep["local_end_date_time"])]
if sleep_period_container.shape[0] >= 1:
return True
else:
return False
def getStepsOutsideFitbitMainSleep(sleep, steps):
steps['inMainSleep'] = steps.apply(lambda row : isInMainSleep(row['local_date_time'], sleep), axis = 1)
return steps[steps['inMainSleep'] == False]
def getStepsOutsideFixedMainSleep(sleepStart, sleepEnd, steps):
steps = steps.set_index('local_date_time')
steps['inMainSleep'] = False
steps.loc[steps.between_time(sleepStart, sleepEnd).index, 'inMainSleep'] = True
steps.reset_index(level=0, inplace=True)
return steps[steps['inMainSleep'] == False]
step_data = pd.read_csv(snakemake.input["step_data"], parse_dates=["local_date_time", "local_date"])
day_segment = snakemake.params["day_segment"]
threshold_active_bout = snakemake.params["threshold_active_bout"]
include_zero_step_rows = snakemake.params["include_zero_step_rows"]
exclude_sleep = snakemake.params["exclude_sleep"]
exclude_sleep_type = snakemake.params["exclude_sleep_type"]
exclude_sleep_fixed_start = snakemake.params["exclude_sleep_fixed_start"]
exclude_sleep_fixed_end = snakemake.params["exclude_sleep_fixed_end"]
step_features = pd.DataFrame(columns=["local_date"])
requested_features = {}
requested_features["features_all_steps"] = snakemake.params["features_all_steps"]
requested_features["features_sedentary_bout"] = [feature + "sedentarybout" for feature in snakemake.params["features_sedentary_bout"]]
requested_features["features_active_bout"] = [feature + "activebout" for feature in snakemake.params["features_active_bout"]]
if exclude_sleep == True:
if exclude_sleep_type == "FIXED":
if isInvalidTime(exclude_sleep_fixed_start):
raise ValueError("Your fixed start time has an invalid format in your config.yml file")
if isInvalidTime(exclude_sleep_fixed_end):
raise ValueError("Your fixed end time has an invalid format in your config.yml file")
step_data = getStepsOutsideFixedMainSleep(exclude_sleep_fixed_start, exclude_sleep_fixed_end, step_data)
elif exclude_sleep_type == "FITBIT_BASED":
sleep_data = pd.read_csv(snakemake.input["sleep_data"], parse_dates=["local_start_date_time", "local_end_date_time"])
step_data = getStepsOutsideFitbitMainSleep(sleep_data, step_data)
else:
raise ValueError("We only support FIXED or FITBIT_BASED to filter step data based on sleep data. You typed " + exclude_sleep_type + ", Check your config.yaml file for typos")
step_features = step_features.merge(base_fitbit_step_features(step_data, day_segment, requested_features, threshold_active_bout, include_zero_step_rows), on="local_date", how="outer")
assert np.sum([len(x) for x in requested_features.values()]) + 1 == step_features.shape[1], "The number of features in the output dataframe (=" + str(step_features.shape[1]) + ") does not match the expected value (=" + str(np.sum([len(x) for x in requested_features.values()])) + " + 1). Verify your fitbit step feature extraction functions"
step_features.to_csv(snakemake.output[0], index=False)