Update sleep summary features for segments
parent
10384204a1
commit
deba6b9e4f
25
Snakefile
25
Snakefile
|
@ -147,10 +147,6 @@ for provider in config["PHONE_LOCATIONS"]["PROVIDERS"].keys():
|
|||
if config["FITBIT_CALORIES"]["TABLE_FORMAT"] not in ["JSON", "CSV"]:
|
||||
raise ValueError("config['FITBIT_CALORIES']['TABLE_FORMAT'] should be JSON or CSV but you typed" + config["FITBIT_CALORIES"]["TABLE_FORMAT"])
|
||||
|
||||
if config["FITBIT_SLEEP"]["TABLE_FORMAT"] not in ["JSON", "CSV"]:
|
||||
raise ValueError("config['FITBIT_SLEEP']['TABLE_FORMAT'] should be JSON or CSV but you typed" + config["FITBIT_SLEEP"]["TABLE_FORMAT"])
|
||||
|
||||
|
||||
for provider in config["FITBIT_HEARTRATE_SUMMARY"]["PROVIDERS"].keys():
|
||||
if config["FITBIT_HEARTRATE_SUMMARY"]["PROVIDERS"][provider]["COMPUTE"]:
|
||||
files_to_compute.extend(expand("data/raw/{pid}/fitbit_heartrate_summary_raw.csv", pid=config["PIDS"]))
|
||||
|
@ -167,6 +163,20 @@ for provider in config["FITBIT_HEARTRATE_INTRADAY"]["PROVIDERS"].keys():
|
|||
files_to_compute.extend(expand("data/interim/{pid}/fitbit_heartrate_intraday_features/fitbit_heartrate_intraday_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["FITBIT_HEARTRATE_INTRADAY"]["PROVIDERS"][provider]["SRC_LANGUAGE"].lower(), provider_key=provider.lower()))
|
||||
files_to_compute.extend(expand("data/processed/features/{pid}/fitbit_heartrate_intraday.csv", pid=config["PIDS"]))
|
||||
|
||||
for provider in config["FITBIT_SLEEP_SUMMARY"]["PROVIDERS"].keys():
|
||||
if config["FITBIT_SLEEP_SUMMARY"]["PROVIDERS"][provider]["COMPUTE"]:
|
||||
files_to_compute.extend(expand("data/raw/{pid}/fitbit_sleep_summary_raw.csv", pid=config["PIDS"]))
|
||||
files_to_compute.extend(expand("data/raw/{pid}/fitbit_sleep_summary_parsed.csv", pid=config["PIDS"]))
|
||||
files_to_compute.extend(expand("data/raw/{pid}/fitbit_sleep_summary_parsed_with_datetime.csv", pid=config["PIDS"]))
|
||||
files_to_compute.extend(expand("data/interim/{pid}/fitbit_sleep_summary_features/fitbit_sleep_summary_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["FITBIT_SLEEP_SUMMARY"]["PROVIDERS"][provider]["SRC_LANGUAGE"].lower(), provider_key=provider.lower()))
|
||||
files_to_compute.extend(expand("data/processed/features/{pid}/fitbit_sleep_summary.csv", pid=config["PIDS"]))
|
||||
|
||||
# for provider in config["FITBIT_SLEEP_INTRADAY"]["PROVIDERS"].keys():
|
||||
# if config["FITBIT_SLEEP_INTRADAY"]["PROVIDERS"][provider]["COMPUTE"]:
|
||||
# files_to_compute.extend(expand("data/raw/{pid}/fitbit_sleep_intraday_raw.csv", pid=config["PIDS"]))
|
||||
# files_to_compute.extend(expand("data/raw/{pid}/fitbit_sleep_intraday_parsed.csv", pid=config["PIDS"]))
|
||||
# files_to_compute.extend(expand("data/raw/{pid}/fitbit_sleep_intraday_parsed_with_datetime.csv", pid=config["PIDS"]))
|
||||
|
||||
for provider in config["FITBIT_STEPS_SUMMARY"]["PROVIDERS"].keys():
|
||||
if config["FITBIT_STEPS_SUMMARY"]["PROVIDERS"][provider]["COMPUTE"]:
|
||||
files_to_compute.extend(expand("data/raw/{pid}/fitbit_steps_summary_raw.csv", pid=config["PIDS"]))
|
||||
|
@ -189,13 +199,6 @@ for provider in config["FITBIT_CALORIES"]["PROVIDERS"].keys():
|
|||
files_to_compute.extend(expand("data/raw/{pid}/fitbit_calories_{fitbit_data_type}_parsed.csv", pid=config["PIDS"], fitbit_data_type=["summary", "intraday"]))
|
||||
files_to_compute.extend(expand("data/raw/{pid}/fitbit_calories_{fitbit_data_type}_parsed_with_datetime.csv", pid=config["PIDS"], fitbit_data_type=["summary", "intraday"]))
|
||||
|
||||
for provider in config["FITBIT_SLEEP"]["PROVIDERS"].keys():
|
||||
if config["FITBIT_SLEEP"]["PROVIDERS"][provider]["COMPUTE"]:
|
||||
files_to_compute.extend(expand("data/raw/{pid}/fitbit_sleep_{fitbit_data_type}_raw.csv", pid=config["PIDS"], fitbit_data_type=(["json"] if config["FITBIT_SLEEP"]["TABLE_FORMAT"] == "JSON" else ["summary", "intraday"])))
|
||||
files_to_compute.extend(expand("data/raw/{pid}/fitbit_sleep_{fitbit_data_type}_parsed_episodes.csv", pid=config["PIDS"], fitbit_data_type=["summary"]))
|
||||
files_to_compute.extend(expand("data/raw/{pid}/fitbit_sleep_{fitbit_data_type}_parsed.csv", pid=config["PIDS"], fitbit_data_type=["intraday"]))
|
||||
files_to_compute.extend(expand("data/raw/{pid}/fitbit_sleep_{fitbit_data_type}_parsed_with_datetime.csv", pid=config["PIDS"], fitbit_data_type=["intraday"]))
|
||||
|
||||
# visualization for data exploration
|
||||
if config["HEATMAP_FEATURES_CORRELATIONS"]["PLOT"]:
|
||||
files_to_compute.extend(expand("reports/data_exploration/{min_valid_hours_per_day}hours_{min_valid_bins_per_hour}bins/heatmap_features_correlations.html", min_valid_hours_per_day=config["HEATMAP_FEATURES_CORRELATIONS"]["MIN_VALID_HOURS_PER_DAY"], min_valid_bins_per_hour=config["PHONE_VALID_SENSED_DAYS"]["MIN_VALID_BINS_PER_HOUR"]))
|
||||
|
|
14
config.yaml
14
config.yaml
|
@ -300,18 +300,16 @@ FITBIT_STEPS_INTRADAY:
|
|||
SRC_FOLDER: "rapids" # inside src/features/fitbit_steps_intraday
|
||||
SRC_LANGUAGE: "python"
|
||||
|
||||
FITBIT_SLEEP:
|
||||
TABLE_FORMAT: JSON # JSON or CSV. If your JSON or CSV data are files change [DEVICE_DATA][FITBIT][SOURCE][TYPE] to FILES
|
||||
TABLE:
|
||||
JSON: fitbit_sleep
|
||||
CSV:
|
||||
SUMMARY: sleep_summary
|
||||
INTRADAY: sleep_intraday
|
||||
FITBIT_SLEEP_SUMMARY:
|
||||
TABLE: sleep_summary
|
||||
SLEEP_EPISODE_TIMESTAMP: end # summary sleep episodes are considered as events based on either the start timestamp or end timestamp.
|
||||
PROVIDERS:
|
||||
RAPIDS:
|
||||
COMPUTE: False
|
||||
FEATURES: ["countepisode", "avgefficiency", "sumdurationafterwakeup", "sumdurationasleep", "sumdurationawake", "sumdurationtofallasleep", "sumdurationinbed", "avgdurationafterwakeup", "avgdurationasleep", "avgdurationawake", "avgdurationtofallasleep", "avgdurationinbed"]
|
||||
SLEEP_TYPES: ["main", "nap", "all"]
|
||||
SUMMARY_FEATURES: ["sumdurationafterwakeup", "sumdurationasleep", "sumdurationawake", "sumdurationtofallasleep", "sumdurationinbed", "avgefficiency", "countepisode"]
|
||||
SRC_FOLDER: "rapids" # inside src/features/fitbit_sleep_summary
|
||||
SRC_LANGUAGE: "python"
|
||||
|
||||
FITBIT_CALORIES:
|
||||
TABLE_FORMAT: JSON # JSON or CSV. If your JSON or CSV data are files change [DEVICE_DATA][FITBIT][SOURCE][TYPE] to FILES
|
||||
|
|
|
@ -476,6 +476,32 @@ rule fitbit_steps_intraday_r_features:
|
|||
script:
|
||||
"../src/features/entry.R"
|
||||
|
||||
rule fitbit_sleep_summary_python_features:
|
||||
input:
|
||||
sensor_data = "data/raw/{pid}/fitbit_sleep_summary_parsed_with_datetime.csv",
|
||||
day_segments_labels = "data/interim/day_segments/{pid}_day_segments_labels.csv"
|
||||
params:
|
||||
provider = lambda wildcards: config["FITBIT_SLEEP_SUMMARY"]["PROVIDERS"][wildcards.provider_key.upper()],
|
||||
provider_key = "{provider_key}",
|
||||
sensor_key = "fitbit_sleep_summary"
|
||||
output:
|
||||
"data/interim/{pid}/fitbit_sleep_summary_features/fitbit_sleep_summary_python_{provider_key}.csv"
|
||||
script:
|
||||
"../src/features/entry.py"
|
||||
|
||||
rule fitbit_sleep_summary_r_features:
|
||||
input:
|
||||
sensor_data = "data/raw/{pid}/fitbit_sleep_summary_parsed_with_datetime.csv",
|
||||
day_segments_labels = "data/interim/day_segments/{pid}_day_segments_labels.csv"
|
||||
params:
|
||||
provider = lambda wildcards: config["FITBIT_SLEEP_SUMMARY"]["PROVIDERS"][wildcards.provider_key.upper()],
|
||||
provider_key = "{provider_key}",
|
||||
sensor_key = "fitbit_sleep_summary"
|
||||
output:
|
||||
"data/interim/{pid}/fitbit_sleep_summary_features/fitbit_sleep_summary_r_{provider_key}.csv"
|
||||
script:
|
||||
"../src/features/entry.R"
|
||||
|
||||
# rule fitbit_sleep_features:
|
||||
# input:
|
||||
# sleep_summary_data = "data/raw/{pid}/fitbit_sleep_summary_with_datetime.csv",
|
||||
|
|
|
@ -206,6 +206,20 @@ rule fitbit_parse_steps:
|
|||
script:
|
||||
"../src/data/fitbit_parse_steps.py"
|
||||
|
||||
rule fitbit_parse_sleep:
|
||||
input:
|
||||
"data/raw/{pid}/fitbit_sleep_{fitbit_data_type}_raw.csv"
|
||||
params:
|
||||
timezone = config["DEVICE_DATA"]["PHONE"]["TIMEZONE"]["VALUE"],
|
||||
table = lambda wildcards: config["FITBIT_SLEEP_"+str(wildcards.fitbit_data_type).upper()]["TABLE"],
|
||||
column_format = config["DEVICE_DATA"]["FITBIT"]["SOURCE"]["COLUMN_FORMAT"],
|
||||
fitbit_data_type = "{fitbit_data_type}",
|
||||
sleep_episode_timestamp = config["FITBIT_SLEEP_SUMMARY"]["SLEEP_EPISODE_TIMESTAMP"]
|
||||
output:
|
||||
"data/raw/{pid}/fitbit_sleep_{fitbit_data_type}_parsed.csv"
|
||||
script:
|
||||
"../src/data/fitbit_parse_sleep.py"
|
||||
|
||||
rule fitbit_parse_calories:
|
||||
input:
|
||||
data = expand("data/raw/{{pid}}/fitbit_calories_{fitbit_data_type}_raw.csv", fitbit_data_type = (["json"] if config["FITBIT_CALORIES"]["TABLE_FORMAT"] == "JSON" else ["summary", "intraday"]))
|
||||
|
@ -219,19 +233,6 @@ rule fitbit_parse_calories:
|
|||
script:
|
||||
"../src/data/fitbit_parse_calories.py"
|
||||
|
||||
rule fitbit_parse_sleep:
|
||||
input:
|
||||
data = expand("data/raw/{{pid}}/fitbit_sleep_{fitbit_data_type}_raw.csv", fitbit_data_type = (["json"] if config["FITBIT_SLEEP"]["TABLE_FORMAT"] == "JSON" else ["summary", "intraday"]))
|
||||
params:
|
||||
timezone = config["DEVICE_DATA"]["PHONE"]["TIMEZONE"]["VALUE"],
|
||||
table = config["FITBIT_SLEEP"]["TABLE"],
|
||||
table_format = config["FITBIT_SLEEP"]["TABLE_FORMAT"]
|
||||
output:
|
||||
summary_data = "data/raw/{pid}/fitbit_sleep_summary_parsed_episodes.csv",
|
||||
intraday_data = "data/raw/{pid}/fitbit_sleep_intraday_parsed.csv"
|
||||
script:
|
||||
"../src/data/fitbit_parse_sleep.py"
|
||||
|
||||
rule fitbit_readable_datetime:
|
||||
input:
|
||||
sensor_input = "data/raw/{pid}/fitbit_{sensor}_{fitbit_data_type}_parsed.csv",
|
||||
|
|
|
@ -1,9 +1,8 @@
|
|||
import json
|
||||
import pandas as pd
|
||||
from datetime import datetime
|
||||
import numpy as np
|
||||
from datetime import datetime, timedelta
|
||||
import dateutil.parser
|
||||
from datetime import timedelta
|
||||
|
||||
SLEEP_CODE2LEVEL = ["asleep", "restless", "awake"]
|
||||
|
||||
|
@ -12,7 +11,7 @@ SLEEP_SUMMARY_COLUMNS_V1_2 = ("device_id", "efficiency",
|
|||
"minutes_after_wakeup", "minutes_asleep", "minutes_awake", "minutes_to_fall_asleep", "minutes_in_bed",
|
||||
"is_main_sleep", "type",
|
||||
"local_start_date_time", "local_end_date_time",
|
||||
"start_timestamp", "end_timestamp")
|
||||
"timestamp")
|
||||
SLEEP_SUMMARY_COLUMNS_V1 = SLEEP_SUMMARY_COLUMNS_V1_2 + ("count_awake", "duration_awake", "count_awakenings", "count_restless", "duration_restless")
|
||||
|
||||
SLEEP_INTRADAY_COLUMNS = ("device_id",
|
||||
|
@ -71,71 +70,75 @@ def classicData1min(data_summary):
|
|||
newRow = {'dateTime':dateutil.parser.parse(origEntry['dateTime'])+timedelta(seconds=counter*timeDuration),'level':origEntry['level'],'seconds':timeDuration}
|
||||
dataList.append(newRow)
|
||||
counter = counter + 1
|
||||
# print(dataList)
|
||||
return dataList
|
||||
# Parse one record for sleep API version 1
|
||||
def parseOneRecordForV1(record, device_id, d_is_main_sleep, records_summary, records_intraday):
|
||||
|
||||
# Summary data
|
||||
# Parse one record for sleep API version 1
|
||||
def parseOneRecordForV1(record, device_id, d_is_main_sleep, records_summary, records_intraday, fitbit_data_type):
|
||||
|
||||
sleep_record_type = "classic"
|
||||
|
||||
d_start_datetime = datetime.strptime(record["startTime"][:18], "%Y-%m-%dT%H:%M:%S")
|
||||
d_end_datetime = datetime.strptime(record["endTime"][:18], "%Y-%m-%dT%H:%M:%S")
|
||||
|
||||
row_summary = (device_id, record["efficiency"],
|
||||
record["minutesAfterWakeup"], record["minutesAsleep"], record["minutesAwake"], record["minutesToFallAsleep"], record["timeInBed"],
|
||||
d_is_main_sleep, sleep_record_type,
|
||||
d_start_datetime, d_end_datetime,
|
||||
d_start_datetime.date(), d_end_datetime.date(),
|
||||
0,0,
|
||||
record["awakeCount"], record["awakeDuration"], record["awakeningsCount"],
|
||||
record["restlessCount"], record["restlessDuration"])
|
||||
|
||||
records_summary.append(row_summary)
|
||||
# Summary data
|
||||
if fitbit_data_type == "summary":
|
||||
row_summary = (device_id, record["efficiency"],
|
||||
record["minutesAfterWakeup"], record["minutesAsleep"], record["minutesAwake"], record["minutesToFallAsleep"], record["timeInBed"],
|
||||
d_is_main_sleep, sleep_record_type,
|
||||
d_start_datetime, d_end_datetime,
|
||||
0,
|
||||
record["awakeCount"], record["awakeDuration"], record["awakeningsCount"],
|
||||
record["restlessCount"], record["restlessDuration"])
|
||||
|
||||
records_summary.append(row_summary)
|
||||
|
||||
# Intraday data
|
||||
start_date = d_start_datetime.date()
|
||||
end_date = d_end_datetime.date()
|
||||
is_before_midnight = True
|
||||
curr_date = start_date
|
||||
for data in record["minuteData"]:
|
||||
# For overnight episodes, use end_date once we are over midnight
|
||||
d_time = datetime.strptime(data["dateTime"], '%H:%M:%S').time()
|
||||
if is_before_midnight and d_time.hour == 0:
|
||||
curr_date = end_date
|
||||
d_datetime = datetime.combine(curr_date, d_time)
|
||||
if fitbit_data_type == "intraday":
|
||||
start_date = d_start_datetime.date()
|
||||
end_date = d_end_datetime.date()
|
||||
is_before_midnight = True
|
||||
curr_date = start_date
|
||||
for data in record["minuteData"]:
|
||||
# For overnight episodes, use end_date once we are over midnight
|
||||
d_time = datetime.strptime(data["dateTime"], '%H:%M:%S').time()
|
||||
if is_before_midnight and d_time.hour == 0:
|
||||
curr_date = end_date
|
||||
d_datetime = datetime.combine(curr_date, d_time)
|
||||
|
||||
# API 1.2 stores original_level as strings, so we convert original_levels of API 1 to strings too
|
||||
# (1: "asleep", 2: "restless", 3: "awake")
|
||||
d_original_level = SLEEP_CODE2LEVEL[int(data["value"])-1]
|
||||
# API 1.2 stores original_level as strings, so we convert original_levels of API 1 to strings too
|
||||
# (1: "asleep", 2: "restless", 3: "awake")
|
||||
d_original_level = SLEEP_CODE2LEVEL[int(data["value"])-1]
|
||||
|
||||
|
||||
row_intraday = (device_id,
|
||||
d_original_level, -1, d_is_main_sleep, sleep_record_type,
|
||||
d_datetime, 0)
|
||||
row_intraday = (device_id,
|
||||
d_original_level, -1, d_is_main_sleep, sleep_record_type,
|
||||
d_datetime, 0)
|
||||
|
||||
records_intraday.append(row_intraday)
|
||||
records_intraday.append(row_intraday)
|
||||
|
||||
return records_summary, records_intraday
|
||||
|
||||
# Parse one record for sleep API version 1.2
|
||||
def parseOneRecordForV12(record, device_id, d_is_main_sleep, records_summary, records_intraday):
|
||||
def parseOneRecordForV12(record, device_id, d_is_main_sleep, records_summary, records_intraday, fitbit_data_type):
|
||||
|
||||
# Summary data
|
||||
sleep_record_type = record['type']
|
||||
|
||||
d_start_datetime = datetime.strptime(record["startTime"][:18], "%Y-%m-%dT%H:%M:%S")
|
||||
d_end_datetime = datetime.strptime(record["endTime"][:18], "%Y-%m-%dT%H:%M:%S")
|
||||
|
||||
row_summary = (device_id, record["efficiency"],
|
||||
record["minutesAfterWakeup"], record["minutesAsleep"], record["minutesAwake"], record["minutesToFallAsleep"], record["timeInBed"],
|
||||
d_is_main_sleep, sleep_record_type,
|
||||
d_start_datetime, d_end_datetime,
|
||||
0,0)
|
||||
# Summary data
|
||||
if fitbit_data_type == "summary":
|
||||
row_summary = (device_id, record["efficiency"],
|
||||
record["minutesAfterWakeup"], record["minutesAsleep"], record["minutesAwake"], record["minutesToFallAsleep"], record["timeInBed"],
|
||||
d_is_main_sleep, sleep_record_type,
|
||||
d_start_datetime, d_end_datetime,
|
||||
0,0)
|
||||
|
||||
records_summary.append(row_summary)
|
||||
|
||||
records_summary.append(row_summary)
|
||||
if sleep_record_type == 'classic':
|
||||
# Intraday data
|
||||
# Intraday data
|
||||
if fitbit_data_type == "intraday":
|
||||
if sleep_record_type == 'classic':
|
||||
start_date = d_start_datetime.date()
|
||||
end_date = d_end_datetime.date()
|
||||
is_before_midnight = True
|
||||
|
@ -155,8 +158,8 @@ def parseOneRecordForV12(record, device_id, d_is_main_sleep, records_summary, re
|
|||
d_original_level, -1, d_is_main_sleep, sleep_record_type,
|
||||
d_datetime, 0)
|
||||
records_intraday.append(row_intraday)
|
||||
else:
|
||||
## for sleep type "stages"
|
||||
else:
|
||||
# For sleep type "stages"
|
||||
start_date = d_start_datetime.date()
|
||||
end_date = d_end_datetime.date()
|
||||
is_before_midnight = True
|
||||
|
@ -182,7 +185,7 @@ def parseOneRecordForV12(record, device_id, d_is_main_sleep, records_summary, re
|
|||
|
||||
|
||||
|
||||
def parseSleepData(sleep_data):
|
||||
def parseSleepData(sleep_data, fitbit_data_type):
|
||||
SLEEP_SUMMARY_COLUMNS = SLEEP_SUMMARY_COLUMNS_V1_2
|
||||
if sleep_data.empty:
|
||||
return pd.DataFrame(columns=SLEEP_SUMMARY_COLUMNS), pd.DataFrame(columns=SLEEP_INTRADAY_COLUMNS)
|
||||
|
@ -197,32 +200,54 @@ def parseSleepData(sleep_data):
|
|||
# For sleep API version 1
|
||||
if "awakeCount" in record:
|
||||
SLEEP_SUMMARY_COLUMNS = SLEEP_SUMMARY_COLUMNS_V1
|
||||
records_summary, records_intraday = parseOneRecordForV1(record, device_id, d_is_main_sleep, records_summary, records_intraday)
|
||||
records_summary, records_intraday = parseOneRecordForV1(record, device_id, d_is_main_sleep, records_summary, records_intraday, fitbit_data_type)
|
||||
# For sleep API version 1.2
|
||||
else:
|
||||
SLEEP_SUMMARY_COLUMNS = SLEEP_SUMMARY_COLUMNS_V1_2
|
||||
records_summary, records_intraday = parseOneRecordForV12(record, device_id, d_is_main_sleep, records_summary, records_intraday)
|
||||
records_summary, records_intraday = parseOneRecordForV12(record, device_id, d_is_main_sleep, records_summary, records_intraday, fitbit_data_type)
|
||||
|
||||
if fitbit_data_type == "summary":
|
||||
parsed_data = pd.DataFrame(data=records_summary, columns=SLEEP_SUMMARY_COLUMNS)
|
||||
elif fitbit_data_type == "intraday":
|
||||
parsed_data = pd.DataFrame(data=records_intraday, columns=SLEEP_INTRADAY_COLUMNS)
|
||||
else:
|
||||
raise ValueError("fitbit_data_type can only be one of ['summary', 'intraday'].")
|
||||
|
||||
return parsed_data
|
||||
|
||||
|
||||
return pd.DataFrame(data=records_summary, columns=SLEEP_SUMMARY_COLUMNS), pd.DataFrame(data=records_intraday, columns=SLEEP_INTRADAY_COLUMNS)
|
||||
|
||||
table_format = snakemake.params["table_format"]
|
||||
timezone = snakemake.params["timezone"]
|
||||
column_format = snakemake.params["column_format"]
|
||||
fitbit_data_type = snakemake.params["fitbit_data_type"]
|
||||
sleep_episode_timestamp = snakemake.params["sleep_episode_timestamp"]
|
||||
|
||||
if table_format == "JSON":
|
||||
if column_format == "JSON":
|
||||
json_raw = pd.read_csv(snakemake.input[0])
|
||||
summary, intraday = parseSleepData(json_raw)
|
||||
elif table_format == "CSV":
|
||||
summary = pd.read_csv(snakemake.input[0], parse_dates=["local_start_date_time", "local_end_date_time"], date_parser=lambda col: pd.to_datetime(col).tz_localize(None))
|
||||
intraday = pd.read_csv(snakemake.input[1], parse_dates=["local_date_time"], date_parser=lambda col: pd.to_datetime(col).tz_localize(None))
|
||||
parsed_data = parseSleepData(json_raw, fitbit_data_type)
|
||||
elif column_format == "PLAIN_TEXT":
|
||||
if fitbit_data_type == "summary":
|
||||
parsed_data = pd.read_csv(snakemake.input[0], parse_dates=["local_start_date_time", "local_end_date_time"], date_parser=lambda col: pd.to_datetime(col).tz_localize(None))
|
||||
elif fitbit_data_type == "intraday":
|
||||
parsed_data = pd.read_csv(snakemake.input[1], parse_dates=["local_date_time"], date_parser=lambda col: pd.to_datetime(col).tz_localize(None))
|
||||
else:
|
||||
raise ValueError("fitbit_data_type can only be one of ['summary', 'intraday'].")
|
||||
else:
|
||||
raise ValueError("column_format can only be one of ['JSON', 'PLAIN_TEXT'].")
|
||||
|
||||
if summary.shape[0] > 0:
|
||||
summary["start_timestamp"] = summary["local_start_date_time"].dt.tz_localize(timezone).astype(np.int64) // 10**6
|
||||
summary["end_timestamp"] = summary["local_end_date_time"].dt.tz_localize(timezone).astype(np.int64) // 10**6
|
||||
if intraday.shape[0] > 0:
|
||||
intraday["timestamp"] = intraday["local_date_time"].dt.tz_localize(timezone).astype(np.int64) // 10**6
|
||||
if parsed_data.shape[0] > 0 and fitbit_data_type == "summary":
|
||||
if sleep_episode_timestamp == "start":
|
||||
parsed_data["timestamp"] = parsed_data["local_start_date_time"].dt.tz_localize(timezone).astype(np.int64) // 10**6
|
||||
elif sleep_episode_timestamp == "end":
|
||||
parsed_data["timestamp"] = parsed_data["local_end_date_time"].dt.tz_localize(timezone).astype(np.int64) // 10**6
|
||||
else:
|
||||
raise ValueError("SLEEP_EPISODE_TIMESTAMP can only be one of ['start', 'end'].")
|
||||
# Drop useless columns: local_start_date_time and local_end_date_time
|
||||
parsed_data.drop(["local_start_date_time", "local_end_date_time"], axis = 1, inplace=True)
|
||||
|
||||
# Unifying level
|
||||
intraday["unified_level"] = np.where(intraday["level"].isin(["awake", "wake", "restless"]), 0, 1)
|
||||
if parsed_data.shape[0] > 0 and fitbit_data_type == "intraday":
|
||||
parsed_data["timestamp"] = parsed_data["local_date_time"].dt.tz_localize(timezone).astype(np.int64) // 10**6
|
||||
# Unifying level
|
||||
parsed_data["unified_level"] = np.where(parsed_data["level"].isin(["awake", "wake", "restless"]), 0, 1)
|
||||
|
||||
summary.to_csv(snakemake.output["summary_data"], index=False)
|
||||
intraday.to_csv(snakemake.output["intraday_data"], index=False)
|
||||
parsed_data.to_csv(snakemake.output[0], index=False)
|
|
@ -1,70 +0,0 @@
|
|||
import pandas as pd
|
||||
import itertools
|
||||
|
||||
|
||||
|
||||
def dailyFeaturesFromSummaryData(sleep_daily_features, sleep_summary_data, summary_features, sleep_type):
|
||||
if sleep_type == "main":
|
||||
sleep_summary_data = sleep_summary_data[sleep_summary_data["is_main_sleep"] == 1]
|
||||
elif sleep_type == "nap":
|
||||
sleep_summary_data = sleep_summary_data[sleep_summary_data["is_main_sleep"] == 0]
|
||||
elif sleep_type == "all":
|
||||
pass
|
||||
else:
|
||||
raise ValueError("sleep_type can only be one of ['main', 'nap', 'all'].")
|
||||
|
||||
features_sum = sleep_summary_data[["minutes_after_wakeup", "minutes_asleep", "minutes_awake", "minutes_to_fall_asleep", "minutes_in_bed", "local_end_date"]].groupby(["local_end_date"]).sum()
|
||||
features_sum.index.rename("local_date", inplace=True)
|
||||
if "sumdurationafterwakeup" in summary_features:
|
||||
sleep_daily_features = sleep_daily_features.join(features_sum[["minutes_after_wakeup"]], how="outer").rename(columns={"minutes_after_wakeup": "sleep_daily_sumdurationafterwakeup" + sleep_type})
|
||||
if "sumdurationasleep" in summary_features:
|
||||
sleep_daily_features = sleep_daily_features.join(features_sum[["minutes_asleep"]], how="outer").rename(columns={"minutes_asleep": "sleep_daily_sumdurationasleep" + sleep_type})
|
||||
if "sumdurationawake" in summary_features:
|
||||
sleep_daily_features = sleep_daily_features.join(features_sum[["minutes_awake"]], how="outer").rename(columns={"minutes_awake": "sleep_daily_sumdurationawake" + sleep_type})
|
||||
if "sumdurationtofallasleep" in summary_features:
|
||||
sleep_daily_features = sleep_daily_features.join(features_sum[["minutes_to_fall_asleep"]], how="outer").rename(columns={"minutes_to_fall_asleep": "sleep_daily_sumdurationtofallasleep" + sleep_type})
|
||||
if "sumdurationinbed" in summary_features:
|
||||
sleep_daily_features = sleep_daily_features.join(features_sum[["minutes_in_bed"]], how="outer").rename(columns={"minutes_in_bed": "sleep_daily_sumdurationinbed" + sleep_type})
|
||||
|
||||
features_avg = sleep_summary_data[["efficiency", "local_end_date"]].groupby(["local_end_date"]).mean()
|
||||
features_avg.index.rename("local_date", inplace=True)
|
||||
if "avgefficiency" in summary_features:
|
||||
sleep_daily_features = sleep_daily_features.join(features_avg[["efficiency"]], how="outer").rename(columns={"efficiency": "sleep_daily_avgefficiency" + sleep_type})
|
||||
|
||||
features_count = sleep_summary_data[["local_start_date_time", "local_end_date"]].groupby(["local_end_date"]).count()
|
||||
features_count.index.rename("local_date", inplace=True)
|
||||
if "countepisode" in summary_features:
|
||||
sleep_daily_features = sleep_daily_features.join(features_count[["local_start_date_time"]], how="outer").rename(columns={"local_start_date_time": "sleep_daily_countepisode" + sleep_type})
|
||||
|
||||
return sleep_daily_features
|
||||
|
||||
def base_fitbit_sleep_features(sleep_summary_data, day_segment, requested_summary_features, requested_sleep_type):
|
||||
if not day_segment == "daily":
|
||||
return pd.DataFrame(columns=["local_date"])
|
||||
else:
|
||||
# name of the features this function can compute
|
||||
base_summary_features_names = ["sumdurationafterwakeup", "sumdurationasleep", "sumdurationawake", "sumdurationtofallasleep", "sumdurationinbed", "avgefficiency", "countepisode"]
|
||||
base_sleep_type = ["main", "nap", "all"]
|
||||
# the subset of requested features this function can compute
|
||||
summary_features_to_compute = list(set(requested_summary_features) & set(base_summary_features_names))
|
||||
sleep_type_to_compute = list(set(requested_sleep_type) & set(base_sleep_type))
|
||||
# full names
|
||||
features_fullnames_to_compute = ["".join(feature) for feature in itertools.product(summary_features_to_compute, sleep_type_to_compute)]
|
||||
|
||||
colnames_can_be_zero = ["sleep_daily_" + x for x in [col for col in features_fullnames_to_compute if "avgefficiency" not in col]]
|
||||
|
||||
if sleep_summary_data.empty:
|
||||
sleep_summary_features = pd.DataFrame(columns=["local_date"] + ["sleep_daily_" + x for x in features_fullnames_to_compute])
|
||||
else:
|
||||
|
||||
sleep_summary_features = pd.DataFrame()
|
||||
|
||||
for sleep_type in sleep_type_to_compute:
|
||||
sleep_summary_features = dailyFeaturesFromSummaryData(sleep_summary_features, sleep_summary_data, summary_features_to_compute, sleep_type)
|
||||
|
||||
sleep_summary_features[colnames_can_be_zero] = sleep_summary_features[colnames_can_be_zero].fillna(0)
|
||||
|
||||
sleep_summary_features = sleep_summary_features.reset_index()
|
||||
|
||||
return sleep_summary_features
|
||||
|
|
@ -1,18 +0,0 @@
|
|||
import pandas as pd
|
||||
from fitbit_sleep.fitbit_sleep_base import base_fitbit_sleep_features
|
||||
import itertools
|
||||
|
||||
sleep_summary_data = pd.read_csv(snakemake.input["sleep_summary_data"])
|
||||
requested_summary_features = snakemake.params["summary_features"]
|
||||
requested_sleep_type = snakemake.params["sleep_types"]
|
||||
day_segment = snakemake.params["day_segment"]
|
||||
sleep_features = pd.DataFrame(columns=["local_date"])
|
||||
|
||||
sleep_features = sleep_features.merge(base_fitbit_sleep_features(sleep_summary_data, day_segment, requested_summary_features, requested_sleep_type), on="local_date", how="outer")
|
||||
|
||||
requested_features = ["".join(feature) for feature in itertools.product(requested_summary_features, requested_sleep_type)] if day_segment == "daily" else []
|
||||
|
||||
assert len(requested_features) + 1 == sleep_features.shape[1], "The number of features in the output dataframe (=" + str(sleep_features.shape[1]) + ") does not match the expected value (=" + str(len(requested_features)) + " + 1). Verify your fitbit sleep feature extraction functions"
|
||||
|
||||
sleep_features.to_csv(snakemake.output[0], index=False)
|
||||
|
|
@ -0,0 +1,91 @@
|
|||
import pandas as pd
|
||||
import itertools
|
||||
|
||||
def extractSleepFeaturesFromSummaryData(sleep_summary_data, summary_features, sleep_type, sleep_summary_features):
|
||||
if sleep_type == "main":
|
||||
sleep_summary_data = sleep_summary_data[sleep_summary_data["is_main_sleep"] == 1]
|
||||
elif sleep_type == "nap":
|
||||
sleep_summary_data = sleep_summary_data[sleep_summary_data["is_main_sleep"] == 0]
|
||||
elif sleep_type == "all":
|
||||
pass
|
||||
else:
|
||||
raise ValueError("sleep_type can only be one of ['main', 'nap', 'all'].")
|
||||
|
||||
features_sum = sleep_summary_data[["local_segment", "minutes_after_wakeup", "minutes_asleep", "minutes_awake", "minutes_to_fall_asleep", "minutes_in_bed"]].groupby(["local_segment"]).sum()
|
||||
|
||||
if "summarysumdurationafterwakeup" in summary_features:
|
||||
sleep_summary_features = sleep_summary_features.join(features_sum[["minutes_after_wakeup"]], how="outer").rename(columns={"minutes_after_wakeup": "sleep_rapids_summarysumdurationafterwakeup" + sleep_type})
|
||||
if "summarysumdurationasleep" in summary_features:
|
||||
sleep_summary_features = sleep_summary_features.join(features_sum[["minutes_asleep"]], how="outer").rename(columns={"minutes_asleep": "sleep_rapids_summarysumdurationasleep" + sleep_type})
|
||||
if "summarysumdurationawake" in summary_features:
|
||||
sleep_summary_features = sleep_summary_features.join(features_sum[["minutes_awake"]], how="outer").rename(columns={"minutes_awake": "sleep_rapids_summarysumdurationawake" + sleep_type})
|
||||
if "summarysumdurationtofallasleep" in summary_features:
|
||||
sleep_summary_features = sleep_summary_features.join(features_sum[["minutes_to_fall_asleep"]], how="outer").rename(columns={"minutes_to_fall_asleep": "sleep_rapids_summarysumdurationtofallasleep" + sleep_type})
|
||||
if "summarysumdurationinbed" in summary_features:
|
||||
sleep_summary_features = sleep_summary_features.join(features_sum[["minutes_in_bed"]], how="outer").rename(columns={"minutes_in_bed": "sleep_rapids_summarysumdurationinbed" + sleep_type})
|
||||
|
||||
features_avg = sleep_summary_data[["local_segment", "efficiency", "minutes_after_wakeup", "minutes_asleep", "minutes_awake", "minutes_to_fall_asleep", "minutes_in_bed"]].groupby(["local_segment"]).mean()
|
||||
|
||||
if "summaryavgefficiency" in summary_features:
|
||||
sleep_summary_features = sleep_summary_features.join(features_avg[["efficiency"]], how="outer").rename(columns={"efficiency": "sleep_rapids_summaryavgefficiency" + sleep_type})
|
||||
if "summaryavgdurationafterwakeup" in summary_features:
|
||||
sleep_summary_features = sleep_summary_features.join(features_avg[["minutes_after_wakeup"]], how="outer").rename(columns={"minutes_after_wakeup": "sleep_rapids_summaryavgdurationafterwakeup" + sleep_type})
|
||||
if "summaryavgdurationasleep" in summary_features:
|
||||
sleep_summary_features = sleep_summary_features.join(features_avg[["minutes_asleep"]], how="outer").rename(columns={"minutes_asleep": "sleep_rapids_summaryavgdurationasleep" + sleep_type})
|
||||
if "summaryavgdurationawake" in summary_features:
|
||||
sleep_summary_features = sleep_summary_features.join(features_avg[["minutes_awake"]], how="outer").rename(columns={"minutes_awake": "sleep_rapids_summaryavgdurationawake" + sleep_type})
|
||||
if "summaryavgdurationtofallasleep" in summary_features:
|
||||
sleep_summary_features = sleep_summary_features.join(features_avg[["minutes_to_fall_asleep"]], how="outer").rename(columns={"minutes_to_fall_asleep": "sleep_rapids_summaryavgdurationtofallasleep" + sleep_type})
|
||||
if "summaryavgdurationinbed" in summary_features:
|
||||
sleep_summary_features = sleep_summary_features.join(features_avg[["minutes_in_bed"]], how="outer").rename(columns={"minutes_in_bed": "sleep_rapids_summaryavgdurationinbed" + sleep_type})
|
||||
|
||||
features_count = sleep_summary_data[["local_segment", "timestamp"]].groupby(["local_segment"]).count()
|
||||
|
||||
if "summarycountepisode" in summary_features:
|
||||
sleep_summary_features = sleep_summary_features.join(features_count[["timestamp"]], how="outer").rename(columns={"timestamp": "sleep_rapids_summarycountepisode" + sleep_type})
|
||||
|
||||
return sleep_summary_features
|
||||
|
||||
|
||||
def rapids_features(sensor_data_files, day_segment, provider, filter_data_by_segment, *args, **kwargs):
|
||||
|
||||
sleep_summary_data = pd.read_csv(sensor_data_files["sensor_data"])
|
||||
|
||||
requested_summary_features = ["summary" + x for x in provider["FEATURES"]]
|
||||
requested_sleep_types = provider["SLEEP_TYPES"]
|
||||
|
||||
# name of the features this function can compute
|
||||
base_summary_features = ["summarycountepisode", "summaryavgefficiency", "summarysumdurationafterwakeup", "summarysumdurationasleep", "summarysumdurationawake", "summarysumdurationtofallasleep", "summarysumdurationinbed", "summaryavgdurationafterwakeup", "summaryavgdurationasleep", "summaryavgdurationawake", "summaryavgdurationtofallasleep", "summaryavgdurationinbed"]
|
||||
base_sleep_types = ["main", "nap", "all"]
|
||||
# the subset of requested features this function can compute
|
||||
summary_features_to_compute = list(set(requested_summary_features) & set(base_summary_features))
|
||||
sleep_types_to_compute = list(set(requested_sleep_types) & set(base_sleep_types))
|
||||
# full names
|
||||
features_fullnames_to_compute = ["".join(feature) for feature in itertools.product(summary_features_to_compute, sleep_types_to_compute)]
|
||||
|
||||
colnames_can_be_zero = ["sleep_rapids_" + x for x in [col for col in features_fullnames_to_compute if "summaryavgefficiency" not in col]]
|
||||
|
||||
# extract features from summary data
|
||||
sleep_summary_features = pd.DataFrame(columns=["local_segment"] + ["sleep_rapids_" + x for x in features_fullnames_to_compute])
|
||||
if not sleep_summary_data.empty:
|
||||
sleep_summary_data = filter_data_by_segment(sleep_summary_data, day_segment)
|
||||
|
||||
if not sleep_summary_data.empty:
|
||||
# only keep the segments start at 00:00:00 and end at 23:59:59
|
||||
datetime_start_regex = "[0-9]{4}[\\-|\\/][0-9]{2}[\\-|\\/][0-9]{2} 00:00:00"
|
||||
datetime_end_regex = "[0-9]{4}[\\-|\\/][0-9]{2}[\\-|\\/][0-9]{2} 23:59:59"
|
||||
|
||||
segment_regex = "{}#{},{}".format(day_segment, datetime_start_regex, datetime_end_regex)
|
||||
sleep_summary_data = sleep_summary_data[sleep_summary_data["local_segment"].str.match(segment_regex)]
|
||||
|
||||
if not sleep_summary_data.empty:
|
||||
sleep_summary_features = pd.DataFrame()
|
||||
|
||||
for sleep_type in sleep_types_to_compute:
|
||||
sleep_summary_features = extractSleepFeaturesFromSummaryData(sleep_summary_data, summary_features_to_compute, sleep_type, sleep_summary_features)
|
||||
|
||||
sleep_summary_features[colnames_can_be_zero] = sleep_summary_features[colnames_can_be_zero].fillna(0)
|
||||
|
||||
sleep_summary_features = sleep_summary_features.reset_index()
|
||||
|
||||
return sleep_summary_features
|
|
@ -1,66 +0,0 @@
|
|||
import pandas as pd
|
||||
import numpy as np
|
||||
import time
|
||||
from fitbit_step.fitbit_step_base import base_fitbit_step_features
|
||||
|
||||
def isInvalidTime(str_time):
|
||||
try:
|
||||
time.strptime(str_time, '%H:%M')
|
||||
return False
|
||||
except ValueError:
|
||||
return True
|
||||
|
||||
def isInMainSleep(local_date_time, sleep):
|
||||
# sleep_period_container = sleep.query("local_start_date_time <= @local_date_time <= local_end_date_time")
|
||||
sleep_period_container = sleep[(sleep["local_start_date_time"] <= local_date_time) & (local_date_time <= sleep["local_end_date_time"])]
|
||||
if sleep_period_container.shape[0] >= 1:
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
|
||||
def getStepsOutsideFitbitMainSleep(sleep, steps):
|
||||
steps['inMainSleep'] = steps.apply(lambda row : isInMainSleep(row['local_date_time'], sleep), axis = 1)
|
||||
return steps[steps['inMainSleep'] == False]
|
||||
|
||||
|
||||
def getStepsOutsideFixedMainSleep(sleepStart, sleepEnd, steps):
|
||||
steps = steps.set_index('local_date_time')
|
||||
steps['inMainSleep'] = False
|
||||
steps.loc[steps.between_time(sleepStart, sleepEnd).index, 'inMainSleep'] = True
|
||||
steps.reset_index(level=0, inplace=True)
|
||||
return steps[steps['inMainSleep'] == False]
|
||||
|
||||
step_data = pd.read_csv(snakemake.input["step_data"], parse_dates=["local_date_time", "local_date"])
|
||||
day_segment = snakemake.params["day_segment"]
|
||||
threshold_active_bout = snakemake.params["threshold_active_bout"]
|
||||
include_zero_step_rows = snakemake.params["include_zero_step_rows"]
|
||||
exclude_sleep = snakemake.params["exclude_sleep"]
|
||||
exclude_sleep_type = snakemake.params["exclude_sleep_type"]
|
||||
exclude_sleep_fixed_start = snakemake.params["exclude_sleep_fixed_start"]
|
||||
exclude_sleep_fixed_end = snakemake.params["exclude_sleep_fixed_end"]
|
||||
|
||||
step_features = pd.DataFrame(columns=["local_date"])
|
||||
requested_features = {}
|
||||
requested_features["features_all_steps"] = snakemake.params["features_all_steps"]
|
||||
requested_features["features_sedentary_bout"] = [feature + "sedentarybout" for feature in snakemake.params["features_sedentary_bout"]]
|
||||
requested_features["features_active_bout"] = [feature + "activebout" for feature in snakemake.params["features_active_bout"]]
|
||||
|
||||
if exclude_sleep == True:
|
||||
if exclude_sleep_type == "FIXED":
|
||||
if isInvalidTime(exclude_sleep_fixed_start):
|
||||
raise ValueError("Your fixed start time has an invalid format in your config.yml file")
|
||||
if isInvalidTime(exclude_sleep_fixed_end):
|
||||
raise ValueError("Your fixed end time has an invalid format in your config.yml file")
|
||||
step_data = getStepsOutsideFixedMainSleep(exclude_sleep_fixed_start, exclude_sleep_fixed_end, step_data)
|
||||
elif exclude_sleep_type == "FITBIT_BASED":
|
||||
sleep_data = pd.read_csv(snakemake.input["sleep_data"], parse_dates=["local_start_date_time", "local_end_date_time"])
|
||||
step_data = getStepsOutsideFitbitMainSleep(sleep_data, step_data)
|
||||
else:
|
||||
raise ValueError("We only support FIXED or FITBIT_BASED to filter step data based on sleep data. You typed " + exclude_sleep_type + ", Check your config.yaml file for typos")
|
||||
|
||||
step_features = step_features.merge(base_fitbit_step_features(step_data, day_segment, requested_features, threshold_active_bout, include_zero_step_rows), on="local_date", how="outer")
|
||||
|
||||
|
||||
assert np.sum([len(x) for x in requested_features.values()]) + 1 == step_features.shape[1], "The number of features in the output dataframe (=" + str(step_features.shape[1]) + ") does not match the expected value (=" + str(np.sum([len(x) for x in requested_features.values()])) + " + 1). Verify your fitbit step feature extraction functions"
|
||||
|
||||
step_features.to_csv(snakemake.output[0], index=False)
|
Loading…
Reference in New Issue