2020-11-30 18:34:14 +01:00
|
|
|
import json, yaml
|
2020-05-15 23:51:00 +02:00
|
|
|
import pandas as pd
|
2020-05-27 20:29:28 +02:00
|
|
|
import numpy as np
|
2020-11-23 18:01:00 +01:00
|
|
|
from datetime import datetime, timedelta
|
2020-05-27 20:29:28 +02:00
|
|
|
import dateutil.parser
|
2020-05-15 23:51:00 +02:00
|
|
|
|
|
|
|
SLEEP_CODE2LEVEL = ["asleep", "restless", "awake"]
|
|
|
|
|
|
|
|
|
|
|
|
SLEEP_SUMMARY_COLUMNS_V1_2 = ("device_id", "efficiency",
|
|
|
|
"minutes_after_wakeup", "minutes_asleep", "minutes_awake", "minutes_to_fall_asleep", "minutes_in_bed",
|
|
|
|
"is_main_sleep", "type",
|
|
|
|
"local_start_date_time", "local_end_date_time",
|
2020-11-23 18:01:00 +01:00
|
|
|
"timestamp")
|
2020-05-15 23:51:00 +02:00
|
|
|
SLEEP_SUMMARY_COLUMNS_V1 = SLEEP_SUMMARY_COLUMNS_V1_2 + ("count_awake", "duration_awake", "count_awakenings", "count_restless", "duration_restless")
|
|
|
|
|
|
|
|
SLEEP_INTRADAY_COLUMNS = ("device_id",
|
|
|
|
# For "classic" type, original_level is one of {"awake", "restless", "asleep"}
|
|
|
|
# For "stages" type, original_level is one of {"wake", "deep", "light", "rem"}
|
2020-10-22 19:08:52 +02:00
|
|
|
"level",
|
2020-05-15 23:51:00 +02:00
|
|
|
# For "classic" type, unified_level is one of {0, 1} where 0: awake {"awake" + "restless"}, 1: asleep {"asleep"}
|
|
|
|
# For "stages" type, unified_level is one of {0, 1} where 0: awake {"wake"}, 1: asleep {"deep" + "light" + "rem"}
|
|
|
|
"unified_level",
|
|
|
|
# one of {0, 1} where 0: nap, 1: main sleep
|
|
|
|
"is_main_sleep",
|
|
|
|
# one of {"classic", "stages"}
|
|
|
|
"type",
|
2020-10-22 19:08:52 +02:00
|
|
|
"local_date_time",
|
|
|
|
"timestamp")
|
2020-05-15 23:51:00 +02:00
|
|
|
|
2020-05-27 20:29:28 +02:00
|
|
|
def mergeLongAndShortData(data_summary):
|
|
|
|
longData = pd.DataFrame(columns=['dateTime', 'level', 'seconds'])
|
|
|
|
shortData = pd.DataFrame(columns=['dateTime','level', 'seconds'])
|
|
|
|
|
|
|
|
windowLength = 30
|
|
|
|
|
|
|
|
for data in data_summary['data']:
|
|
|
|
origEntry = data
|
|
|
|
counter = 0
|
|
|
|
numberOfSplits = origEntry['seconds']//windowLength
|
|
|
|
for times in range(numberOfSplits):
|
|
|
|
newRow = {'dateTime':dateutil.parser.parse(origEntry['dateTime'])+timedelta(seconds=counter*windowLength),'level':origEntry['level'],'seconds':windowLength}
|
|
|
|
longData = longData.append(newRow, ignore_index = True)
|
|
|
|
counter = counter + 1
|
|
|
|
|
|
|
|
for data in data_summary['shortData']:
|
|
|
|
origEntry = data
|
|
|
|
counter = 0
|
|
|
|
numberOfSplits = origEntry['seconds']//windowLength
|
|
|
|
for times in range(numberOfSplits):
|
|
|
|
newRow = {'dateTime':dateutil.parser.parse(origEntry['dateTime'])+timedelta(seconds=counter*windowLength),'level':origEntry['level'],'seconds':windowLength}
|
|
|
|
shortData = shortData.append(newRow,ignore_index = True)
|
|
|
|
counter = counter + 1
|
|
|
|
longData.set_index('dateTime',inplace=True)
|
|
|
|
shortData.set_index('dateTime',inplace=True)
|
|
|
|
longData['level'] = np.where(longData.index.isin(shortData.index) == True,'wake',longData['level'])
|
|
|
|
|
|
|
|
longData.reset_index(inplace=True)
|
|
|
|
|
|
|
|
return longData.values.tolist()
|
|
|
|
|
2020-06-03 22:40:42 +02:00
|
|
|
def classicData1min(data_summary):
|
|
|
|
dataList = list()
|
|
|
|
for data in data_summary['data']:
|
|
|
|
origEntry = data
|
|
|
|
counter = 0
|
|
|
|
timeDuration = 60
|
|
|
|
numberOfSplits = origEntry['seconds']//timeDuration
|
|
|
|
for times in range(numberOfSplits):
|
|
|
|
newRow = {'dateTime':dateutil.parser.parse(origEntry['dateTime'])+timedelta(seconds=counter*timeDuration),'level':origEntry['level'],'seconds':timeDuration}
|
|
|
|
dataList.append(newRow)
|
|
|
|
counter = counter + 1
|
|
|
|
return dataList
|
2020-11-23 18:01:00 +01:00
|
|
|
|
2020-05-15 23:51:00 +02:00
|
|
|
# Parse one record for sleep API version 1
|
2020-11-23 18:01:00 +01:00
|
|
|
def parseOneRecordForV1(record, device_id, d_is_main_sleep, records_summary, records_intraday, fitbit_data_type):
|
2020-05-15 23:51:00 +02:00
|
|
|
|
|
|
|
sleep_record_type = "classic"
|
|
|
|
|
|
|
|
d_start_datetime = datetime.strptime(record["startTime"][:18], "%Y-%m-%dT%H:%M:%S")
|
|
|
|
d_end_datetime = datetime.strptime(record["endTime"][:18], "%Y-%m-%dT%H:%M:%S")
|
|
|
|
|
2020-11-23 18:01:00 +01:00
|
|
|
# Summary data
|
|
|
|
if fitbit_data_type == "summary":
|
|
|
|
row_summary = (device_id, record["efficiency"],
|
|
|
|
record["minutesAfterWakeup"], record["minutesAsleep"], record["minutesAwake"], record["minutesToFallAsleep"], record["timeInBed"],
|
|
|
|
d_is_main_sleep, sleep_record_type,
|
|
|
|
d_start_datetime, d_end_datetime,
|
|
|
|
0,
|
|
|
|
record["awakeCount"], record["awakeDuration"], record["awakeningsCount"],
|
|
|
|
record["restlessCount"], record["restlessDuration"])
|
|
|
|
|
|
|
|
records_summary.append(row_summary)
|
2020-05-15 23:51:00 +02:00
|
|
|
|
|
|
|
# Intraday data
|
2020-11-23 18:01:00 +01:00
|
|
|
if fitbit_data_type == "intraday":
|
|
|
|
start_date = d_start_datetime.date()
|
|
|
|
end_date = d_end_datetime.date()
|
|
|
|
is_before_midnight = True
|
|
|
|
curr_date = start_date
|
|
|
|
for data in record["minuteData"]:
|
|
|
|
# For overnight episodes, use end_date once we are over midnight
|
|
|
|
d_time = datetime.strptime(data["dateTime"], '%H:%M:%S').time()
|
|
|
|
if is_before_midnight and d_time.hour == 0:
|
|
|
|
curr_date = end_date
|
|
|
|
d_datetime = datetime.combine(curr_date, d_time)
|
2020-05-15 23:51:00 +02:00
|
|
|
|
2020-11-23 18:01:00 +01:00
|
|
|
# API 1.2 stores original_level as strings, so we convert original_levels of API 1 to strings too
|
|
|
|
# (1: "asleep", 2: "restless", 3: "awake")
|
|
|
|
d_original_level = SLEEP_CODE2LEVEL[int(data["value"])-1]
|
2020-05-15 23:51:00 +02:00
|
|
|
|
|
|
|
|
2020-11-23 18:01:00 +01:00
|
|
|
row_intraday = (device_id,
|
|
|
|
d_original_level, -1, d_is_main_sleep, sleep_record_type,
|
|
|
|
d_datetime, 0)
|
2020-05-15 23:51:00 +02:00
|
|
|
|
2020-11-23 18:01:00 +01:00
|
|
|
records_intraday.append(row_intraday)
|
2020-05-15 23:51:00 +02:00
|
|
|
|
|
|
|
return records_summary, records_intraday
|
|
|
|
|
|
|
|
# Parse one record for sleep API version 1.2
|
2020-11-23 18:01:00 +01:00
|
|
|
def parseOneRecordForV12(record, device_id, d_is_main_sleep, records_summary, records_intraday, fitbit_data_type):
|
2020-05-27 20:29:28 +02:00
|
|
|
|
|
|
|
sleep_record_type = record['type']
|
|
|
|
|
|
|
|
d_start_datetime = datetime.strptime(record["startTime"][:18], "%Y-%m-%dT%H:%M:%S")
|
|
|
|
d_end_datetime = datetime.strptime(record["endTime"][:18], "%Y-%m-%dT%H:%M:%S")
|
|
|
|
|
2020-11-23 18:01:00 +01:00
|
|
|
# Summary data
|
|
|
|
if fitbit_data_type == "summary":
|
|
|
|
row_summary = (device_id, record["efficiency"],
|
|
|
|
record["minutesAfterWakeup"], record["minutesAsleep"], record["minutesAwake"], record["minutesToFallAsleep"], record["timeInBed"],
|
|
|
|
d_is_main_sleep, sleep_record_type,
|
|
|
|
d_start_datetime, d_end_datetime,
|
2020-11-24 19:43:24 +01:00
|
|
|
0)
|
2020-11-23 18:01:00 +01:00
|
|
|
|
|
|
|
records_summary.append(row_summary)
|
2020-05-27 20:29:28 +02:00
|
|
|
|
2020-11-23 18:01:00 +01:00
|
|
|
# Intraday data
|
|
|
|
if fitbit_data_type == "intraday":
|
|
|
|
if sleep_record_type == 'classic':
|
2020-05-27 20:29:28 +02:00
|
|
|
start_date = d_start_datetime.date()
|
|
|
|
end_date = d_end_datetime.date()
|
|
|
|
is_before_midnight = True
|
|
|
|
curr_date = start_date
|
|
|
|
data_summary = record['levels']
|
2020-06-03 22:40:42 +02:00
|
|
|
dataSplitted = classicData1min(data_summary) ##Calling the function to split the data in regular 60 seconds interval
|
|
|
|
for data in dataSplitted:
|
2020-05-27 20:29:28 +02:00
|
|
|
# For overnight episodes, use end_date once we are over midnight
|
2020-06-03 22:40:42 +02:00
|
|
|
d_time = data["dateTime"].time()
|
2020-05-27 20:29:28 +02:00
|
|
|
if is_before_midnight and d_time.hour == 0:
|
|
|
|
curr_date = end_date
|
|
|
|
d_datetime = datetime.combine(curr_date, d_time)
|
|
|
|
|
|
|
|
d_original_level = data["level"]
|
|
|
|
|
|
|
|
row_intraday = (device_id,
|
2020-10-22 19:08:52 +02:00
|
|
|
d_original_level, -1, d_is_main_sleep, sleep_record_type,
|
|
|
|
d_datetime, 0)
|
2020-05-27 20:29:28 +02:00
|
|
|
records_intraday.append(row_intraday)
|
2020-11-23 18:01:00 +01:00
|
|
|
else:
|
|
|
|
# For sleep type "stages"
|
2020-05-27 20:29:28 +02:00
|
|
|
start_date = d_start_datetime.date()
|
|
|
|
end_date = d_end_datetime.date()
|
|
|
|
is_before_midnight = True
|
|
|
|
curr_date = start_date
|
|
|
|
data_summary = record['levels']
|
|
|
|
dataList = mergeLongAndShortData(data_summary)
|
|
|
|
for data in dataList:
|
|
|
|
|
|
|
|
d_time = data[0].time()
|
|
|
|
if is_before_midnight and d_time.hour == 0:
|
|
|
|
curr_date = end_date
|
|
|
|
d_datetime = datetime.combine(curr_date, d_time)
|
|
|
|
|
|
|
|
d_original_level = data[1]
|
|
|
|
|
|
|
|
row_intraday = (device_id,
|
2020-10-22 19:08:52 +02:00
|
|
|
d_original_level, -1, d_is_main_sleep, sleep_record_type,
|
|
|
|
d_datetime, 0)
|
2020-05-27 20:29:28 +02:00
|
|
|
|
|
|
|
records_intraday.append(row_intraday)
|
|
|
|
|
|
|
|
return records_summary, records_intraday
|
2020-05-15 23:51:00 +02:00
|
|
|
|
|
|
|
|
|
|
|
|
2020-11-23 18:01:00 +01:00
|
|
|
def parseSleepData(sleep_data, fitbit_data_type):
|
2020-06-05 23:28:43 +02:00
|
|
|
SLEEP_SUMMARY_COLUMNS = SLEEP_SUMMARY_COLUMNS_V1_2
|
2020-05-15 23:51:00 +02:00
|
|
|
if sleep_data.empty:
|
2020-06-05 23:28:43 +02:00
|
|
|
return pd.DataFrame(columns=SLEEP_SUMMARY_COLUMNS), pd.DataFrame(columns=SLEEP_INTRADAY_COLUMNS)
|
2020-05-15 23:51:00 +02:00
|
|
|
device_id = sleep_data["device_id"].iloc[0]
|
|
|
|
records_summary, records_intraday = [], []
|
|
|
|
# Parse JSON into individual records
|
|
|
|
for multi_record in sleep_data.fitbit_data:
|
|
|
|
for record in json.loads(multi_record)["sleep"]:
|
|
|
|
# Whether the sleep episode is nap (0) or main sleep (1)
|
|
|
|
d_is_main_sleep = 1 if record["isMainSleep"] else 0
|
|
|
|
|
|
|
|
# For sleep API version 1
|
|
|
|
if "awakeCount" in record:
|
|
|
|
SLEEP_SUMMARY_COLUMNS = SLEEP_SUMMARY_COLUMNS_V1
|
2020-11-23 18:01:00 +01:00
|
|
|
records_summary, records_intraday = parseOneRecordForV1(record, device_id, d_is_main_sleep, records_summary, records_intraday, fitbit_data_type)
|
2020-05-15 23:51:00 +02:00
|
|
|
# For sleep API version 1.2
|
|
|
|
else:
|
|
|
|
SLEEP_SUMMARY_COLUMNS = SLEEP_SUMMARY_COLUMNS_V1_2
|
2020-11-23 18:01:00 +01:00
|
|
|
records_summary, records_intraday = parseOneRecordForV12(record, device_id, d_is_main_sleep, records_summary, records_intraday, fitbit_data_type)
|
|
|
|
|
|
|
|
if fitbit_data_type == "summary":
|
|
|
|
parsed_data = pd.DataFrame(data=records_summary, columns=SLEEP_SUMMARY_COLUMNS)
|
|
|
|
elif fitbit_data_type == "intraday":
|
|
|
|
parsed_data = pd.DataFrame(data=records_intraday, columns=SLEEP_INTRADAY_COLUMNS)
|
|
|
|
else:
|
|
|
|
raise ValueError("fitbit_data_type can only be one of ['summary', 'intraday'].")
|
|
|
|
|
|
|
|
return parsed_data
|
|
|
|
|
2020-05-15 23:51:00 +02:00
|
|
|
|
2020-10-22 19:08:52 +02:00
|
|
|
|
2020-10-22 20:38:40 +02:00
|
|
|
timezone = snakemake.params["timezone"]
|
2020-11-23 18:01:00 +01:00
|
|
|
column_format = snakemake.params["column_format"]
|
|
|
|
fitbit_data_type = snakemake.params["fitbit_data_type"]
|
|
|
|
sleep_episode_timestamp = snakemake.params["sleep_episode_timestamp"]
|
2020-10-22 19:08:52 +02:00
|
|
|
|
2020-11-30 18:34:14 +01:00
|
|
|
with open(snakemake.input["participant_file"], "r", encoding="utf-8") as f:
|
|
|
|
participant_file = yaml.safe_load(f)
|
|
|
|
local_start_date = pd.Timestamp(participant_file["FITBIT"]["START_DATE"])
|
|
|
|
local_end_date = pd.Timestamp(participant_file["FITBIT"]["END_DATE"]) + pd.DateOffset(1)
|
|
|
|
|
2020-11-23 18:01:00 +01:00
|
|
|
if column_format == "JSON":
|
2020-11-30 18:34:14 +01:00
|
|
|
json_raw = pd.read_csv(snakemake.input["raw_data"])
|
2020-11-23 18:01:00 +01:00
|
|
|
parsed_data = parseSleepData(json_raw, fitbit_data_type)
|
|
|
|
elif column_format == "PLAIN_TEXT":
|
|
|
|
if fitbit_data_type == "summary":
|
2020-11-30 18:34:14 +01:00
|
|
|
parsed_data = pd.read_csv(snakemake.input["raw_data"], parse_dates=["local_start_date_time", "local_end_date_time"], date_parser=lambda col: pd.to_datetime(col).tz_localize(None))
|
2020-11-23 18:01:00 +01:00
|
|
|
elif fitbit_data_type == "intraday":
|
2020-11-30 18:34:14 +01:00
|
|
|
parsed_data = pd.read_csv(snakemake.input["raw_data"], parse_dates=["local_date_time"], date_parser=lambda col: pd.to_datetime(col).tz_localize(None))
|
2020-11-23 18:01:00 +01:00
|
|
|
else:
|
|
|
|
raise ValueError("fitbit_data_type can only be one of ['summary', 'intraday'].")
|
|
|
|
else:
|
|
|
|
raise ValueError("column_format can only be one of ['JSON', 'PLAIN_TEXT'].")
|
|
|
|
|
|
|
|
if parsed_data.shape[0] > 0 and fitbit_data_type == "summary":
|
2020-11-30 18:34:14 +01:00
|
|
|
|
|
|
|
if sleep_episode_timestamp != "start" and sleep_episode_timestamp != "end":
|
2020-11-23 18:01:00 +01:00
|
|
|
raise ValueError("SLEEP_EPISODE_TIMESTAMP can only be one of ['start', 'end'].")
|
2020-11-30 18:34:14 +01:00
|
|
|
|
|
|
|
# Column name to be considered as the event datetime
|
|
|
|
datetime_column = "local_" + sleep_episode_timestamp + "_date_time"
|
|
|
|
# Only keep dates in the range of [local_start_date, local_end_date)
|
|
|
|
parsed_data = parsed_data.loc[(parsed_data[datetime_column] >= local_start_date) & (parsed_data[datetime_column] < local_end_date)]
|
|
|
|
# Convert datetime to timestamp
|
|
|
|
parsed_data["timestamp"] = parsed_data[datetime_column].dt.tz_localize(timezone).astype(np.int64) // 10**6
|
2020-11-23 18:01:00 +01:00
|
|
|
# Drop useless columns: local_start_date_time and local_end_date_time
|
|
|
|
parsed_data.drop(["local_start_date_time", "local_end_date_time"], axis = 1, inplace=True)
|
|
|
|
|
|
|
|
if parsed_data.shape[0] > 0 and fitbit_data_type == "intraday":
|
2020-11-30 18:34:14 +01:00
|
|
|
# Only keep dates in the range of [local_start_date, local_end_date)
|
|
|
|
parsed_data = parsed_data.loc[(parsed_data["local_date_time"] >= local_start_date) & (parsed_data["local_date_time"] < local_end_date)]
|
|
|
|
# Convert datetime to timestamp
|
2020-11-23 18:01:00 +01:00
|
|
|
parsed_data["timestamp"] = parsed_data["local_date_time"].dt.tz_localize(timezone).astype(np.int64) // 10**6
|
|
|
|
# Unifying level
|
|
|
|
parsed_data["unified_level"] = np.where(parsed_data["level"].isin(["awake", "wake", "restless"]), 0, 1)
|
|
|
|
|
2020-11-30 18:34:14 +01:00
|
|
|
parsed_data.to_csv(snakemake.output[0], index=False)
|