Update sleep summary features for segments

pull/103/head
Meng Li 2020-11-23 12:01:00 -05:00
parent 10384204a1
commit deba6b9e4f
9 changed files with 241 additions and 251 deletions

View File

@ -147,10 +147,6 @@ for provider in config["PHONE_LOCATIONS"]["PROVIDERS"].keys():
if config["FITBIT_CALORIES"]["TABLE_FORMAT"] not in ["JSON", "CSV"]: if config["FITBIT_CALORIES"]["TABLE_FORMAT"] not in ["JSON", "CSV"]:
raise ValueError("config['FITBIT_CALORIES']['TABLE_FORMAT'] should be JSON or CSV but you typed" + config["FITBIT_CALORIES"]["TABLE_FORMAT"]) raise ValueError("config['FITBIT_CALORIES']['TABLE_FORMAT'] should be JSON or CSV but you typed" + config["FITBIT_CALORIES"]["TABLE_FORMAT"])
if config["FITBIT_SLEEP"]["TABLE_FORMAT"] not in ["JSON", "CSV"]:
raise ValueError("config['FITBIT_SLEEP']['TABLE_FORMAT'] should be JSON or CSV but you typed" + config["FITBIT_SLEEP"]["TABLE_FORMAT"])
for provider in config["FITBIT_HEARTRATE_SUMMARY"]["PROVIDERS"].keys(): for provider in config["FITBIT_HEARTRATE_SUMMARY"]["PROVIDERS"].keys():
if config["FITBIT_HEARTRATE_SUMMARY"]["PROVIDERS"][provider]["COMPUTE"]: if config["FITBIT_HEARTRATE_SUMMARY"]["PROVIDERS"][provider]["COMPUTE"]:
files_to_compute.extend(expand("data/raw/{pid}/fitbit_heartrate_summary_raw.csv", pid=config["PIDS"])) files_to_compute.extend(expand("data/raw/{pid}/fitbit_heartrate_summary_raw.csv", pid=config["PIDS"]))
@ -167,6 +163,20 @@ for provider in config["FITBIT_HEARTRATE_INTRADAY"]["PROVIDERS"].keys():
files_to_compute.extend(expand("data/interim/{pid}/fitbit_heartrate_intraday_features/fitbit_heartrate_intraday_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["FITBIT_HEARTRATE_INTRADAY"]["PROVIDERS"][provider]["SRC_LANGUAGE"].lower(), provider_key=provider.lower())) files_to_compute.extend(expand("data/interim/{pid}/fitbit_heartrate_intraday_features/fitbit_heartrate_intraday_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["FITBIT_HEARTRATE_INTRADAY"]["PROVIDERS"][provider]["SRC_LANGUAGE"].lower(), provider_key=provider.lower()))
files_to_compute.extend(expand("data/processed/features/{pid}/fitbit_heartrate_intraday.csv", pid=config["PIDS"])) files_to_compute.extend(expand("data/processed/features/{pid}/fitbit_heartrate_intraday.csv", pid=config["PIDS"]))
for provider in config["FITBIT_SLEEP_SUMMARY"]["PROVIDERS"].keys():
if config["FITBIT_SLEEP_SUMMARY"]["PROVIDERS"][provider]["COMPUTE"]:
files_to_compute.extend(expand("data/raw/{pid}/fitbit_sleep_summary_raw.csv", pid=config["PIDS"]))
files_to_compute.extend(expand("data/raw/{pid}/fitbit_sleep_summary_parsed.csv", pid=config["PIDS"]))
files_to_compute.extend(expand("data/raw/{pid}/fitbit_sleep_summary_parsed_with_datetime.csv", pid=config["PIDS"]))
files_to_compute.extend(expand("data/interim/{pid}/fitbit_sleep_summary_features/fitbit_sleep_summary_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["FITBIT_SLEEP_SUMMARY"]["PROVIDERS"][provider]["SRC_LANGUAGE"].lower(), provider_key=provider.lower()))
files_to_compute.extend(expand("data/processed/features/{pid}/fitbit_sleep_summary.csv", pid=config["PIDS"]))
# for provider in config["FITBIT_SLEEP_INTRADAY"]["PROVIDERS"].keys():
# if config["FITBIT_SLEEP_INTRADAY"]["PROVIDERS"][provider]["COMPUTE"]:
# files_to_compute.extend(expand("data/raw/{pid}/fitbit_sleep_intraday_raw.csv", pid=config["PIDS"]))
# files_to_compute.extend(expand("data/raw/{pid}/fitbit_sleep_intraday_parsed.csv", pid=config["PIDS"]))
# files_to_compute.extend(expand("data/raw/{pid}/fitbit_sleep_intraday_parsed_with_datetime.csv", pid=config["PIDS"]))
for provider in config["FITBIT_STEPS_SUMMARY"]["PROVIDERS"].keys(): for provider in config["FITBIT_STEPS_SUMMARY"]["PROVIDERS"].keys():
if config["FITBIT_STEPS_SUMMARY"]["PROVIDERS"][provider]["COMPUTE"]: if config["FITBIT_STEPS_SUMMARY"]["PROVIDERS"][provider]["COMPUTE"]:
files_to_compute.extend(expand("data/raw/{pid}/fitbit_steps_summary_raw.csv", pid=config["PIDS"])) files_to_compute.extend(expand("data/raw/{pid}/fitbit_steps_summary_raw.csv", pid=config["PIDS"]))
@ -189,13 +199,6 @@ for provider in config["FITBIT_CALORIES"]["PROVIDERS"].keys():
files_to_compute.extend(expand("data/raw/{pid}/fitbit_calories_{fitbit_data_type}_parsed.csv", pid=config["PIDS"], fitbit_data_type=["summary", "intraday"])) files_to_compute.extend(expand("data/raw/{pid}/fitbit_calories_{fitbit_data_type}_parsed.csv", pid=config["PIDS"], fitbit_data_type=["summary", "intraday"]))
files_to_compute.extend(expand("data/raw/{pid}/fitbit_calories_{fitbit_data_type}_parsed_with_datetime.csv", pid=config["PIDS"], fitbit_data_type=["summary", "intraday"])) files_to_compute.extend(expand("data/raw/{pid}/fitbit_calories_{fitbit_data_type}_parsed_with_datetime.csv", pid=config["PIDS"], fitbit_data_type=["summary", "intraday"]))
for provider in config["FITBIT_SLEEP"]["PROVIDERS"].keys():
if config["FITBIT_SLEEP"]["PROVIDERS"][provider]["COMPUTE"]:
files_to_compute.extend(expand("data/raw/{pid}/fitbit_sleep_{fitbit_data_type}_raw.csv", pid=config["PIDS"], fitbit_data_type=(["json"] if config["FITBIT_SLEEP"]["TABLE_FORMAT"] == "JSON" else ["summary", "intraday"])))
files_to_compute.extend(expand("data/raw/{pid}/fitbit_sleep_{fitbit_data_type}_parsed_episodes.csv", pid=config["PIDS"], fitbit_data_type=["summary"]))
files_to_compute.extend(expand("data/raw/{pid}/fitbit_sleep_{fitbit_data_type}_parsed.csv", pid=config["PIDS"], fitbit_data_type=["intraday"]))
files_to_compute.extend(expand("data/raw/{pid}/fitbit_sleep_{fitbit_data_type}_parsed_with_datetime.csv", pid=config["PIDS"], fitbit_data_type=["intraday"]))
# visualization for data exploration # visualization for data exploration
if config["HEATMAP_FEATURES_CORRELATIONS"]["PLOT"]: if config["HEATMAP_FEATURES_CORRELATIONS"]["PLOT"]:
files_to_compute.extend(expand("reports/data_exploration/{min_valid_hours_per_day}hours_{min_valid_bins_per_hour}bins/heatmap_features_correlations.html", min_valid_hours_per_day=config["HEATMAP_FEATURES_CORRELATIONS"]["MIN_VALID_HOURS_PER_DAY"], min_valid_bins_per_hour=config["PHONE_VALID_SENSED_DAYS"]["MIN_VALID_BINS_PER_HOUR"])) files_to_compute.extend(expand("reports/data_exploration/{min_valid_hours_per_day}hours_{min_valid_bins_per_hour}bins/heatmap_features_correlations.html", min_valid_hours_per_day=config["HEATMAP_FEATURES_CORRELATIONS"]["MIN_VALID_HOURS_PER_DAY"], min_valid_bins_per_hour=config["PHONE_VALID_SENSED_DAYS"]["MIN_VALID_BINS_PER_HOUR"]))

View File

@ -300,18 +300,16 @@ FITBIT_STEPS_INTRADAY:
SRC_FOLDER: "rapids" # inside src/features/fitbit_steps_intraday SRC_FOLDER: "rapids" # inside src/features/fitbit_steps_intraday
SRC_LANGUAGE: "python" SRC_LANGUAGE: "python"
FITBIT_SLEEP: FITBIT_SLEEP_SUMMARY:
TABLE_FORMAT: JSON # JSON or CSV. If your JSON or CSV data are files change [DEVICE_DATA][FITBIT][SOURCE][TYPE] to FILES TABLE: sleep_summary
TABLE: SLEEP_EPISODE_TIMESTAMP: end # summary sleep episodes are considered as events based on either the start timestamp or end timestamp.
JSON: fitbit_sleep
CSV:
SUMMARY: sleep_summary
INTRADAY: sleep_intraday
PROVIDERS: PROVIDERS:
RAPIDS: RAPIDS:
COMPUTE: False COMPUTE: False
FEATURES: ["countepisode", "avgefficiency", "sumdurationafterwakeup", "sumdurationasleep", "sumdurationawake", "sumdurationtofallasleep", "sumdurationinbed", "avgdurationafterwakeup", "avgdurationasleep", "avgdurationawake", "avgdurationtofallasleep", "avgdurationinbed"]
SLEEP_TYPES: ["main", "nap", "all"] SLEEP_TYPES: ["main", "nap", "all"]
SUMMARY_FEATURES: ["sumdurationafterwakeup", "sumdurationasleep", "sumdurationawake", "sumdurationtofallasleep", "sumdurationinbed", "avgefficiency", "countepisode"] SRC_FOLDER: "rapids" # inside src/features/fitbit_sleep_summary
SRC_LANGUAGE: "python"
FITBIT_CALORIES: FITBIT_CALORIES:
TABLE_FORMAT: JSON # JSON or CSV. If your JSON or CSV data are files change [DEVICE_DATA][FITBIT][SOURCE][TYPE] to FILES TABLE_FORMAT: JSON # JSON or CSV. If your JSON or CSV data are files change [DEVICE_DATA][FITBIT][SOURCE][TYPE] to FILES

View File

@ -476,6 +476,32 @@ rule fitbit_steps_intraday_r_features:
script: script:
"../src/features/entry.R" "../src/features/entry.R"
rule fitbit_sleep_summary_python_features:
input:
sensor_data = "data/raw/{pid}/fitbit_sleep_summary_parsed_with_datetime.csv",
day_segments_labels = "data/interim/day_segments/{pid}_day_segments_labels.csv"
params:
provider = lambda wildcards: config["FITBIT_SLEEP_SUMMARY"]["PROVIDERS"][wildcards.provider_key.upper()],
provider_key = "{provider_key}",
sensor_key = "fitbit_sleep_summary"
output:
"data/interim/{pid}/fitbit_sleep_summary_features/fitbit_sleep_summary_python_{provider_key}.csv"
script:
"../src/features/entry.py"
rule fitbit_sleep_summary_r_features:
input:
sensor_data = "data/raw/{pid}/fitbit_sleep_summary_parsed_with_datetime.csv",
day_segments_labels = "data/interim/day_segments/{pid}_day_segments_labels.csv"
params:
provider = lambda wildcards: config["FITBIT_SLEEP_SUMMARY"]["PROVIDERS"][wildcards.provider_key.upper()],
provider_key = "{provider_key}",
sensor_key = "fitbit_sleep_summary"
output:
"data/interim/{pid}/fitbit_sleep_summary_features/fitbit_sleep_summary_r_{provider_key}.csv"
script:
"../src/features/entry.R"
# rule fitbit_sleep_features: # rule fitbit_sleep_features:
# input: # input:
# sleep_summary_data = "data/raw/{pid}/fitbit_sleep_summary_with_datetime.csv", # sleep_summary_data = "data/raw/{pid}/fitbit_sleep_summary_with_datetime.csv",

View File

@ -206,6 +206,20 @@ rule fitbit_parse_steps:
script: script:
"../src/data/fitbit_parse_steps.py" "../src/data/fitbit_parse_steps.py"
rule fitbit_parse_sleep:
input:
"data/raw/{pid}/fitbit_sleep_{fitbit_data_type}_raw.csv"
params:
timezone = config["DEVICE_DATA"]["PHONE"]["TIMEZONE"]["VALUE"],
table = lambda wildcards: config["FITBIT_SLEEP_"+str(wildcards.fitbit_data_type).upper()]["TABLE"],
column_format = config["DEVICE_DATA"]["FITBIT"]["SOURCE"]["COLUMN_FORMAT"],
fitbit_data_type = "{fitbit_data_type}",
sleep_episode_timestamp = config["FITBIT_SLEEP_SUMMARY"]["SLEEP_EPISODE_TIMESTAMP"]
output:
"data/raw/{pid}/fitbit_sleep_{fitbit_data_type}_parsed.csv"
script:
"../src/data/fitbit_parse_sleep.py"
rule fitbit_parse_calories: rule fitbit_parse_calories:
input: input:
data = expand("data/raw/{{pid}}/fitbit_calories_{fitbit_data_type}_raw.csv", fitbit_data_type = (["json"] if config["FITBIT_CALORIES"]["TABLE_FORMAT"] == "JSON" else ["summary", "intraday"])) data = expand("data/raw/{{pid}}/fitbit_calories_{fitbit_data_type}_raw.csv", fitbit_data_type = (["json"] if config["FITBIT_CALORIES"]["TABLE_FORMAT"] == "JSON" else ["summary", "intraday"]))
@ -219,19 +233,6 @@ rule fitbit_parse_calories:
script: script:
"../src/data/fitbit_parse_calories.py" "../src/data/fitbit_parse_calories.py"
rule fitbit_parse_sleep:
input:
data = expand("data/raw/{{pid}}/fitbit_sleep_{fitbit_data_type}_raw.csv", fitbit_data_type = (["json"] if config["FITBIT_SLEEP"]["TABLE_FORMAT"] == "JSON" else ["summary", "intraday"]))
params:
timezone = config["DEVICE_DATA"]["PHONE"]["TIMEZONE"]["VALUE"],
table = config["FITBIT_SLEEP"]["TABLE"],
table_format = config["FITBIT_SLEEP"]["TABLE_FORMAT"]
output:
summary_data = "data/raw/{pid}/fitbit_sleep_summary_parsed_episodes.csv",
intraday_data = "data/raw/{pid}/fitbit_sleep_intraday_parsed.csv"
script:
"../src/data/fitbit_parse_sleep.py"
rule fitbit_readable_datetime: rule fitbit_readable_datetime:
input: input:
sensor_input = "data/raw/{pid}/fitbit_{sensor}_{fitbit_data_type}_parsed.csv", sensor_input = "data/raw/{pid}/fitbit_{sensor}_{fitbit_data_type}_parsed.csv",

View File

@ -1,9 +1,8 @@
import json import json
import pandas as pd import pandas as pd
from datetime import datetime
import numpy as np import numpy as np
from datetime import datetime, timedelta
import dateutil.parser import dateutil.parser
from datetime import timedelta
SLEEP_CODE2LEVEL = ["asleep", "restless", "awake"] SLEEP_CODE2LEVEL = ["asleep", "restless", "awake"]
@ -12,7 +11,7 @@ SLEEP_SUMMARY_COLUMNS_V1_2 = ("device_id", "efficiency",
"minutes_after_wakeup", "minutes_asleep", "minutes_awake", "minutes_to_fall_asleep", "minutes_in_bed", "minutes_after_wakeup", "minutes_asleep", "minutes_awake", "minutes_to_fall_asleep", "minutes_in_bed",
"is_main_sleep", "type", "is_main_sleep", "type",
"local_start_date_time", "local_end_date_time", "local_start_date_time", "local_end_date_time",
"start_timestamp", "end_timestamp") "timestamp")
SLEEP_SUMMARY_COLUMNS_V1 = SLEEP_SUMMARY_COLUMNS_V1_2 + ("count_awake", "duration_awake", "count_awakenings", "count_restless", "duration_restless") SLEEP_SUMMARY_COLUMNS_V1 = SLEEP_SUMMARY_COLUMNS_V1_2 + ("count_awake", "duration_awake", "count_awakenings", "count_restless", "duration_restless")
SLEEP_INTRADAY_COLUMNS = ("device_id", SLEEP_INTRADAY_COLUMNS = ("device_id",
@ -71,29 +70,30 @@ def classicData1min(data_summary):
newRow = {'dateTime':dateutil.parser.parse(origEntry['dateTime'])+timedelta(seconds=counter*timeDuration),'level':origEntry['level'],'seconds':timeDuration} newRow = {'dateTime':dateutil.parser.parse(origEntry['dateTime'])+timedelta(seconds=counter*timeDuration),'level':origEntry['level'],'seconds':timeDuration}
dataList.append(newRow) dataList.append(newRow)
counter = counter + 1 counter = counter + 1
# print(dataList)
return dataList return dataList
# Parse one record for sleep API version 1
def parseOneRecordForV1(record, device_id, d_is_main_sleep, records_summary, records_intraday):
# Summary data # Parse one record for sleep API version 1
def parseOneRecordForV1(record, device_id, d_is_main_sleep, records_summary, records_intraday, fitbit_data_type):
sleep_record_type = "classic" sleep_record_type = "classic"
d_start_datetime = datetime.strptime(record["startTime"][:18], "%Y-%m-%dT%H:%M:%S") d_start_datetime = datetime.strptime(record["startTime"][:18], "%Y-%m-%dT%H:%M:%S")
d_end_datetime = datetime.strptime(record["endTime"][:18], "%Y-%m-%dT%H:%M:%S") d_end_datetime = datetime.strptime(record["endTime"][:18], "%Y-%m-%dT%H:%M:%S")
# Summary data
if fitbit_data_type == "summary":
row_summary = (device_id, record["efficiency"], row_summary = (device_id, record["efficiency"],
record["minutesAfterWakeup"], record["minutesAsleep"], record["minutesAwake"], record["minutesToFallAsleep"], record["timeInBed"], record["minutesAfterWakeup"], record["minutesAsleep"], record["minutesAwake"], record["minutesToFallAsleep"], record["timeInBed"],
d_is_main_sleep, sleep_record_type, d_is_main_sleep, sleep_record_type,
d_start_datetime, d_end_datetime, d_start_datetime, d_end_datetime,
d_start_datetime.date(), d_end_datetime.date(), 0,
0,0,
record["awakeCount"], record["awakeDuration"], record["awakeningsCount"], record["awakeCount"], record["awakeDuration"], record["awakeningsCount"],
record["restlessCount"], record["restlessDuration"]) record["restlessCount"], record["restlessDuration"])
records_summary.append(row_summary) records_summary.append(row_summary)
# Intraday data # Intraday data
if fitbit_data_type == "intraday":
start_date = d_start_datetime.date() start_date = d_start_datetime.date()
end_date = d_end_datetime.date() end_date = d_end_datetime.date()
is_before_midnight = True is_before_midnight = True
@ -119,14 +119,15 @@ def parseOneRecordForV1(record, device_id, d_is_main_sleep, records_summary, rec
return records_summary, records_intraday return records_summary, records_intraday
# Parse one record for sleep API version 1.2 # Parse one record for sleep API version 1.2
def parseOneRecordForV12(record, device_id, d_is_main_sleep, records_summary, records_intraday): def parseOneRecordForV12(record, device_id, d_is_main_sleep, records_summary, records_intraday, fitbit_data_type):
# Summary data
sleep_record_type = record['type'] sleep_record_type = record['type']
d_start_datetime = datetime.strptime(record["startTime"][:18], "%Y-%m-%dT%H:%M:%S") d_start_datetime = datetime.strptime(record["startTime"][:18], "%Y-%m-%dT%H:%M:%S")
d_end_datetime = datetime.strptime(record["endTime"][:18], "%Y-%m-%dT%H:%M:%S") d_end_datetime = datetime.strptime(record["endTime"][:18], "%Y-%m-%dT%H:%M:%S")
# Summary data
if fitbit_data_type == "summary":
row_summary = (device_id, record["efficiency"], row_summary = (device_id, record["efficiency"],
record["minutesAfterWakeup"], record["minutesAsleep"], record["minutesAwake"], record["minutesToFallAsleep"], record["timeInBed"], record["minutesAfterWakeup"], record["minutesAsleep"], record["minutesAwake"], record["minutesToFallAsleep"], record["timeInBed"],
d_is_main_sleep, sleep_record_type, d_is_main_sleep, sleep_record_type,
@ -134,8 +135,10 @@ def parseOneRecordForV12(record, device_id, d_is_main_sleep, records_summary, re
0,0) 0,0)
records_summary.append(row_summary) records_summary.append(row_summary)
if sleep_record_type == 'classic':
# Intraday data # Intraday data
if fitbit_data_type == "intraday":
if sleep_record_type == 'classic':
start_date = d_start_datetime.date() start_date = d_start_datetime.date()
end_date = d_end_datetime.date() end_date = d_end_datetime.date()
is_before_midnight = True is_before_midnight = True
@ -156,7 +159,7 @@ def parseOneRecordForV12(record, device_id, d_is_main_sleep, records_summary, re
d_datetime, 0) d_datetime, 0)
records_intraday.append(row_intraday) records_intraday.append(row_intraday)
else: else:
## for sleep type "stages" # For sleep type "stages"
start_date = d_start_datetime.date() start_date = d_start_datetime.date()
end_date = d_end_datetime.date() end_date = d_end_datetime.date()
is_before_midnight = True is_before_midnight = True
@ -182,7 +185,7 @@ def parseOneRecordForV12(record, device_id, d_is_main_sleep, records_summary, re
def parseSleepData(sleep_data): def parseSleepData(sleep_data, fitbit_data_type):
SLEEP_SUMMARY_COLUMNS = SLEEP_SUMMARY_COLUMNS_V1_2 SLEEP_SUMMARY_COLUMNS = SLEEP_SUMMARY_COLUMNS_V1_2
if sleep_data.empty: if sleep_data.empty:
return pd.DataFrame(columns=SLEEP_SUMMARY_COLUMNS), pd.DataFrame(columns=SLEEP_INTRADAY_COLUMNS) return pd.DataFrame(columns=SLEEP_SUMMARY_COLUMNS), pd.DataFrame(columns=SLEEP_INTRADAY_COLUMNS)
@ -197,32 +200,54 @@ def parseSleepData(sleep_data):
# For sleep API version 1 # For sleep API version 1
if "awakeCount" in record: if "awakeCount" in record:
SLEEP_SUMMARY_COLUMNS = SLEEP_SUMMARY_COLUMNS_V1 SLEEP_SUMMARY_COLUMNS = SLEEP_SUMMARY_COLUMNS_V1
records_summary, records_intraday = parseOneRecordForV1(record, device_id, d_is_main_sleep, records_summary, records_intraday) records_summary, records_intraday = parseOneRecordForV1(record, device_id, d_is_main_sleep, records_summary, records_intraday, fitbit_data_type)
# For sleep API version 1.2 # For sleep API version 1.2
else: else:
SLEEP_SUMMARY_COLUMNS = SLEEP_SUMMARY_COLUMNS_V1_2 SLEEP_SUMMARY_COLUMNS = SLEEP_SUMMARY_COLUMNS_V1_2
records_summary, records_intraday = parseOneRecordForV12(record, device_id, d_is_main_sleep, records_summary, records_intraday) records_summary, records_intraday = parseOneRecordForV12(record, device_id, d_is_main_sleep, records_summary, records_intraday, fitbit_data_type)
if fitbit_data_type == "summary":
parsed_data = pd.DataFrame(data=records_summary, columns=SLEEP_SUMMARY_COLUMNS)
elif fitbit_data_type == "intraday":
parsed_data = pd.DataFrame(data=records_intraday, columns=SLEEP_INTRADAY_COLUMNS)
else:
raise ValueError("fitbit_data_type can only be one of ['summary', 'intraday'].")
return parsed_data
return pd.DataFrame(data=records_summary, columns=SLEEP_SUMMARY_COLUMNS), pd.DataFrame(data=records_intraday, columns=SLEEP_INTRADAY_COLUMNS)
table_format = snakemake.params["table_format"]
timezone = snakemake.params["timezone"] timezone = snakemake.params["timezone"]
column_format = snakemake.params["column_format"]
fitbit_data_type = snakemake.params["fitbit_data_type"]
sleep_episode_timestamp = snakemake.params["sleep_episode_timestamp"]
if table_format == "JSON": if column_format == "JSON":
json_raw = pd.read_csv(snakemake.input[0]) json_raw = pd.read_csv(snakemake.input[0])
summary, intraday = parseSleepData(json_raw) parsed_data = parseSleepData(json_raw, fitbit_data_type)
elif table_format == "CSV": elif column_format == "PLAIN_TEXT":
summary = pd.read_csv(snakemake.input[0], parse_dates=["local_start_date_time", "local_end_date_time"], date_parser=lambda col: pd.to_datetime(col).tz_localize(None)) if fitbit_data_type == "summary":
intraday = pd.read_csv(snakemake.input[1], parse_dates=["local_date_time"], date_parser=lambda col: pd.to_datetime(col).tz_localize(None)) parsed_data = pd.read_csv(snakemake.input[0], parse_dates=["local_start_date_time", "local_end_date_time"], date_parser=lambda col: pd.to_datetime(col).tz_localize(None))
elif fitbit_data_type == "intraday":
parsed_data = pd.read_csv(snakemake.input[1], parse_dates=["local_date_time"], date_parser=lambda col: pd.to_datetime(col).tz_localize(None))
else:
raise ValueError("fitbit_data_type can only be one of ['summary', 'intraday'].")
else:
raise ValueError("column_format can only be one of ['JSON', 'PLAIN_TEXT'].")
if summary.shape[0] > 0: if parsed_data.shape[0] > 0 and fitbit_data_type == "summary":
summary["start_timestamp"] = summary["local_start_date_time"].dt.tz_localize(timezone).astype(np.int64) // 10**6 if sleep_episode_timestamp == "start":
summary["end_timestamp"] = summary["local_end_date_time"].dt.tz_localize(timezone).astype(np.int64) // 10**6 parsed_data["timestamp"] = parsed_data["local_start_date_time"].dt.tz_localize(timezone).astype(np.int64) // 10**6
if intraday.shape[0] > 0: elif sleep_episode_timestamp == "end":
intraday["timestamp"] = intraday["local_date_time"].dt.tz_localize(timezone).astype(np.int64) // 10**6 parsed_data["timestamp"] = parsed_data["local_end_date_time"].dt.tz_localize(timezone).astype(np.int64) // 10**6
else:
raise ValueError("SLEEP_EPISODE_TIMESTAMP can only be one of ['start', 'end'].")
# Drop useless columns: local_start_date_time and local_end_date_time
parsed_data.drop(["local_start_date_time", "local_end_date_time"], axis = 1, inplace=True)
# Unifying level if parsed_data.shape[0] > 0 and fitbit_data_type == "intraday":
intraday["unified_level"] = np.where(intraday["level"].isin(["awake", "wake", "restless"]), 0, 1) parsed_data["timestamp"] = parsed_data["local_date_time"].dt.tz_localize(timezone).astype(np.int64) // 10**6
# Unifying level
parsed_data["unified_level"] = np.where(parsed_data["level"].isin(["awake", "wake", "restless"]), 0, 1)
summary.to_csv(snakemake.output["summary_data"], index=False) parsed_data.to_csv(snakemake.output[0], index=False)
intraday.to_csv(snakemake.output["intraday_data"], index=False)

View File

@ -1,70 +0,0 @@
import pandas as pd
import itertools
def dailyFeaturesFromSummaryData(sleep_daily_features, sleep_summary_data, summary_features, sleep_type):
if sleep_type == "main":
sleep_summary_data = sleep_summary_data[sleep_summary_data["is_main_sleep"] == 1]
elif sleep_type == "nap":
sleep_summary_data = sleep_summary_data[sleep_summary_data["is_main_sleep"] == 0]
elif sleep_type == "all":
pass
else:
raise ValueError("sleep_type can only be one of ['main', 'nap', 'all'].")
features_sum = sleep_summary_data[["minutes_after_wakeup", "minutes_asleep", "minutes_awake", "minutes_to_fall_asleep", "minutes_in_bed", "local_end_date"]].groupby(["local_end_date"]).sum()
features_sum.index.rename("local_date", inplace=True)
if "sumdurationafterwakeup" in summary_features:
sleep_daily_features = sleep_daily_features.join(features_sum[["minutes_after_wakeup"]], how="outer").rename(columns={"minutes_after_wakeup": "sleep_daily_sumdurationafterwakeup" + sleep_type})
if "sumdurationasleep" in summary_features:
sleep_daily_features = sleep_daily_features.join(features_sum[["minutes_asleep"]], how="outer").rename(columns={"minutes_asleep": "sleep_daily_sumdurationasleep" + sleep_type})
if "sumdurationawake" in summary_features:
sleep_daily_features = sleep_daily_features.join(features_sum[["minutes_awake"]], how="outer").rename(columns={"minutes_awake": "sleep_daily_sumdurationawake" + sleep_type})
if "sumdurationtofallasleep" in summary_features:
sleep_daily_features = sleep_daily_features.join(features_sum[["minutes_to_fall_asleep"]], how="outer").rename(columns={"minutes_to_fall_asleep": "sleep_daily_sumdurationtofallasleep" + sleep_type})
if "sumdurationinbed" in summary_features:
sleep_daily_features = sleep_daily_features.join(features_sum[["minutes_in_bed"]], how="outer").rename(columns={"minutes_in_bed": "sleep_daily_sumdurationinbed" + sleep_type})
features_avg = sleep_summary_data[["efficiency", "local_end_date"]].groupby(["local_end_date"]).mean()
features_avg.index.rename("local_date", inplace=True)
if "avgefficiency" in summary_features:
sleep_daily_features = sleep_daily_features.join(features_avg[["efficiency"]], how="outer").rename(columns={"efficiency": "sleep_daily_avgefficiency" + sleep_type})
features_count = sleep_summary_data[["local_start_date_time", "local_end_date"]].groupby(["local_end_date"]).count()
features_count.index.rename("local_date", inplace=True)
if "countepisode" in summary_features:
sleep_daily_features = sleep_daily_features.join(features_count[["local_start_date_time"]], how="outer").rename(columns={"local_start_date_time": "sleep_daily_countepisode" + sleep_type})
return sleep_daily_features
def base_fitbit_sleep_features(sleep_summary_data, day_segment, requested_summary_features, requested_sleep_type):
if not day_segment == "daily":
return pd.DataFrame(columns=["local_date"])
else:
# name of the features this function can compute
base_summary_features_names = ["sumdurationafterwakeup", "sumdurationasleep", "sumdurationawake", "sumdurationtofallasleep", "sumdurationinbed", "avgefficiency", "countepisode"]
base_sleep_type = ["main", "nap", "all"]
# the subset of requested features this function can compute
summary_features_to_compute = list(set(requested_summary_features) & set(base_summary_features_names))
sleep_type_to_compute = list(set(requested_sleep_type) & set(base_sleep_type))
# full names
features_fullnames_to_compute = ["".join(feature) for feature in itertools.product(summary_features_to_compute, sleep_type_to_compute)]
colnames_can_be_zero = ["sleep_daily_" + x for x in [col for col in features_fullnames_to_compute if "avgefficiency" not in col]]
if sleep_summary_data.empty:
sleep_summary_features = pd.DataFrame(columns=["local_date"] + ["sleep_daily_" + x for x in features_fullnames_to_compute])
else:
sleep_summary_features = pd.DataFrame()
for sleep_type in sleep_type_to_compute:
sleep_summary_features = dailyFeaturesFromSummaryData(sleep_summary_features, sleep_summary_data, summary_features_to_compute, sleep_type)
sleep_summary_features[colnames_can_be_zero] = sleep_summary_features[colnames_can_be_zero].fillna(0)
sleep_summary_features = sleep_summary_features.reset_index()
return sleep_summary_features

View File

@ -1,18 +0,0 @@
import pandas as pd
from fitbit_sleep.fitbit_sleep_base import base_fitbit_sleep_features
import itertools
sleep_summary_data = pd.read_csv(snakemake.input["sleep_summary_data"])
requested_summary_features = snakemake.params["summary_features"]
requested_sleep_type = snakemake.params["sleep_types"]
day_segment = snakemake.params["day_segment"]
sleep_features = pd.DataFrame(columns=["local_date"])
sleep_features = sleep_features.merge(base_fitbit_sleep_features(sleep_summary_data, day_segment, requested_summary_features, requested_sleep_type), on="local_date", how="outer")
requested_features = ["".join(feature) for feature in itertools.product(requested_summary_features, requested_sleep_type)] if day_segment == "daily" else []
assert len(requested_features) + 1 == sleep_features.shape[1], "The number of features in the output dataframe (=" + str(sleep_features.shape[1]) + ") does not match the expected value (=" + str(len(requested_features)) + " + 1). Verify your fitbit sleep feature extraction functions"
sleep_features.to_csv(snakemake.output[0], index=False)

View File

@ -0,0 +1,91 @@
import pandas as pd
import itertools
def extractSleepFeaturesFromSummaryData(sleep_summary_data, summary_features, sleep_type, sleep_summary_features):
if sleep_type == "main":
sleep_summary_data = sleep_summary_data[sleep_summary_data["is_main_sleep"] == 1]
elif sleep_type == "nap":
sleep_summary_data = sleep_summary_data[sleep_summary_data["is_main_sleep"] == 0]
elif sleep_type == "all":
pass
else:
raise ValueError("sleep_type can only be one of ['main', 'nap', 'all'].")
features_sum = sleep_summary_data[["local_segment", "minutes_after_wakeup", "minutes_asleep", "minutes_awake", "minutes_to_fall_asleep", "minutes_in_bed"]].groupby(["local_segment"]).sum()
if "summarysumdurationafterwakeup" in summary_features:
sleep_summary_features = sleep_summary_features.join(features_sum[["minutes_after_wakeup"]], how="outer").rename(columns={"minutes_after_wakeup": "sleep_rapids_summarysumdurationafterwakeup" + sleep_type})
if "summarysumdurationasleep" in summary_features:
sleep_summary_features = sleep_summary_features.join(features_sum[["minutes_asleep"]], how="outer").rename(columns={"minutes_asleep": "sleep_rapids_summarysumdurationasleep" + sleep_type})
if "summarysumdurationawake" in summary_features:
sleep_summary_features = sleep_summary_features.join(features_sum[["minutes_awake"]], how="outer").rename(columns={"minutes_awake": "sleep_rapids_summarysumdurationawake" + sleep_type})
if "summarysumdurationtofallasleep" in summary_features:
sleep_summary_features = sleep_summary_features.join(features_sum[["minutes_to_fall_asleep"]], how="outer").rename(columns={"minutes_to_fall_asleep": "sleep_rapids_summarysumdurationtofallasleep" + sleep_type})
if "summarysumdurationinbed" in summary_features:
sleep_summary_features = sleep_summary_features.join(features_sum[["minutes_in_bed"]], how="outer").rename(columns={"minutes_in_bed": "sleep_rapids_summarysumdurationinbed" + sleep_type})
features_avg = sleep_summary_data[["local_segment", "efficiency", "minutes_after_wakeup", "minutes_asleep", "minutes_awake", "minutes_to_fall_asleep", "minutes_in_bed"]].groupby(["local_segment"]).mean()
if "summaryavgefficiency" in summary_features:
sleep_summary_features = sleep_summary_features.join(features_avg[["efficiency"]], how="outer").rename(columns={"efficiency": "sleep_rapids_summaryavgefficiency" + sleep_type})
if "summaryavgdurationafterwakeup" in summary_features:
sleep_summary_features = sleep_summary_features.join(features_avg[["minutes_after_wakeup"]], how="outer").rename(columns={"minutes_after_wakeup": "sleep_rapids_summaryavgdurationafterwakeup" + sleep_type})
if "summaryavgdurationasleep" in summary_features:
sleep_summary_features = sleep_summary_features.join(features_avg[["minutes_asleep"]], how="outer").rename(columns={"minutes_asleep": "sleep_rapids_summaryavgdurationasleep" + sleep_type})
if "summaryavgdurationawake" in summary_features:
sleep_summary_features = sleep_summary_features.join(features_avg[["minutes_awake"]], how="outer").rename(columns={"minutes_awake": "sleep_rapids_summaryavgdurationawake" + sleep_type})
if "summaryavgdurationtofallasleep" in summary_features:
sleep_summary_features = sleep_summary_features.join(features_avg[["minutes_to_fall_asleep"]], how="outer").rename(columns={"minutes_to_fall_asleep": "sleep_rapids_summaryavgdurationtofallasleep" + sleep_type})
if "summaryavgdurationinbed" in summary_features:
sleep_summary_features = sleep_summary_features.join(features_avg[["minutes_in_bed"]], how="outer").rename(columns={"minutes_in_bed": "sleep_rapids_summaryavgdurationinbed" + sleep_type})
features_count = sleep_summary_data[["local_segment", "timestamp"]].groupby(["local_segment"]).count()
if "summarycountepisode" in summary_features:
sleep_summary_features = sleep_summary_features.join(features_count[["timestamp"]], how="outer").rename(columns={"timestamp": "sleep_rapids_summarycountepisode" + sleep_type})
return sleep_summary_features
def rapids_features(sensor_data_files, day_segment, provider, filter_data_by_segment, *args, **kwargs):
sleep_summary_data = pd.read_csv(sensor_data_files["sensor_data"])
requested_summary_features = ["summary" + x for x in provider["FEATURES"]]
requested_sleep_types = provider["SLEEP_TYPES"]
# name of the features this function can compute
base_summary_features = ["summarycountepisode", "summaryavgefficiency", "summarysumdurationafterwakeup", "summarysumdurationasleep", "summarysumdurationawake", "summarysumdurationtofallasleep", "summarysumdurationinbed", "summaryavgdurationafterwakeup", "summaryavgdurationasleep", "summaryavgdurationawake", "summaryavgdurationtofallasleep", "summaryavgdurationinbed"]
base_sleep_types = ["main", "nap", "all"]
# the subset of requested features this function can compute
summary_features_to_compute = list(set(requested_summary_features) & set(base_summary_features))
sleep_types_to_compute = list(set(requested_sleep_types) & set(base_sleep_types))
# full names
features_fullnames_to_compute = ["".join(feature) for feature in itertools.product(summary_features_to_compute, sleep_types_to_compute)]
colnames_can_be_zero = ["sleep_rapids_" + x for x in [col for col in features_fullnames_to_compute if "summaryavgefficiency" not in col]]
# extract features from summary data
sleep_summary_features = pd.DataFrame(columns=["local_segment"] + ["sleep_rapids_" + x for x in features_fullnames_to_compute])
if not sleep_summary_data.empty:
sleep_summary_data = filter_data_by_segment(sleep_summary_data, day_segment)
if not sleep_summary_data.empty:
# only keep the segments start at 00:00:00 and end at 23:59:59
datetime_start_regex = "[0-9]{4}[\\-|\\/][0-9]{2}[\\-|\\/][0-9]{2} 00:00:00"
datetime_end_regex = "[0-9]{4}[\\-|\\/][0-9]{2}[\\-|\\/][0-9]{2} 23:59:59"
segment_regex = "{}#{},{}".format(day_segment, datetime_start_regex, datetime_end_regex)
sleep_summary_data = sleep_summary_data[sleep_summary_data["local_segment"].str.match(segment_regex)]
if not sleep_summary_data.empty:
sleep_summary_features = pd.DataFrame()
for sleep_type in sleep_types_to_compute:
sleep_summary_features = extractSleepFeaturesFromSummaryData(sleep_summary_data, summary_features_to_compute, sleep_type, sleep_summary_features)
sleep_summary_features[colnames_can_be_zero] = sleep_summary_features[colnames_can_be_zero].fillna(0)
sleep_summary_features = sleep_summary_features.reset_index()
return sleep_summary_features

View File

@ -1,66 +0,0 @@
import pandas as pd
import numpy as np
import time
from fitbit_step.fitbit_step_base import base_fitbit_step_features
def isInvalidTime(str_time):
try:
time.strptime(str_time, '%H:%M')
return False
except ValueError:
return True
def isInMainSleep(local_date_time, sleep):
# sleep_period_container = sleep.query("local_start_date_time <= @local_date_time <= local_end_date_time")
sleep_period_container = sleep[(sleep["local_start_date_time"] <= local_date_time) & (local_date_time <= sleep["local_end_date_time"])]
if sleep_period_container.shape[0] >= 1:
return True
else:
return False
def getStepsOutsideFitbitMainSleep(sleep, steps):
steps['inMainSleep'] = steps.apply(lambda row : isInMainSleep(row['local_date_time'], sleep), axis = 1)
return steps[steps['inMainSleep'] == False]
def getStepsOutsideFixedMainSleep(sleepStart, sleepEnd, steps):
steps = steps.set_index('local_date_time')
steps['inMainSleep'] = False
steps.loc[steps.between_time(sleepStart, sleepEnd).index, 'inMainSleep'] = True
steps.reset_index(level=0, inplace=True)
return steps[steps['inMainSleep'] == False]
step_data = pd.read_csv(snakemake.input["step_data"], parse_dates=["local_date_time", "local_date"])
day_segment = snakemake.params["day_segment"]
threshold_active_bout = snakemake.params["threshold_active_bout"]
include_zero_step_rows = snakemake.params["include_zero_step_rows"]
exclude_sleep = snakemake.params["exclude_sleep"]
exclude_sleep_type = snakemake.params["exclude_sleep_type"]
exclude_sleep_fixed_start = snakemake.params["exclude_sleep_fixed_start"]
exclude_sleep_fixed_end = snakemake.params["exclude_sleep_fixed_end"]
step_features = pd.DataFrame(columns=["local_date"])
requested_features = {}
requested_features["features_all_steps"] = snakemake.params["features_all_steps"]
requested_features["features_sedentary_bout"] = [feature + "sedentarybout" for feature in snakemake.params["features_sedentary_bout"]]
requested_features["features_active_bout"] = [feature + "activebout" for feature in snakemake.params["features_active_bout"]]
if exclude_sleep == True:
if exclude_sleep_type == "FIXED":
if isInvalidTime(exclude_sleep_fixed_start):
raise ValueError("Your fixed start time has an invalid format in your config.yml file")
if isInvalidTime(exclude_sleep_fixed_end):
raise ValueError("Your fixed end time has an invalid format in your config.yml file")
step_data = getStepsOutsideFixedMainSleep(exclude_sleep_fixed_start, exclude_sleep_fixed_end, step_data)
elif exclude_sleep_type == "FITBIT_BASED":
sleep_data = pd.read_csv(snakemake.input["sleep_data"], parse_dates=["local_start_date_time", "local_end_date_time"])
step_data = getStepsOutsideFitbitMainSleep(sleep_data, step_data)
else:
raise ValueError("We only support FIXED or FITBIT_BASED to filter step data based on sleep data. You typed " + exclude_sleep_type + ", Check your config.yaml file for typos")
step_features = step_features.merge(base_fitbit_step_features(step_data, day_segment, requested_features, threshold_active_bout, include_zero_step_rows), on="local_date", how="outer")
assert np.sum([len(x) for x in requested_features.values()]) + 1 == step_features.shape[1], "The number of features in the output dataframe (=" + str(step_features.shape[1]) + ") does not match the expected value (=" + str(np.sum([len(x) for x in requested_features.values()])) + " + 1). Verify your fitbit step feature extraction functions"
step_features.to_csv(snakemake.output[0], index=False)