rapids/src/data/fitbit_parse_sensors/fitbit_parse_sleep.py

224 lines
11 KiB
Python

import json
import pandas as pd
from datetime import datetime
import numpy as np
import dateutil.parser
from datetime import timedelta
SLEEP_CODE2LEVEL = ["asleep", "restless", "awake"]
SLEEP_SUMMARY_COLUMNS_V1_2 = ("device_id", "efficiency",
"minutes_after_wakeup", "minutes_asleep", "minutes_awake", "minutes_to_fall_asleep", "minutes_in_bed",
"is_main_sleep", "type",
"local_start_date_time", "local_end_date_time",
"local_start_date", "local_end_date",
"local_start_day_segment", "local_end_day_segment")
SLEEP_SUMMARY_COLUMNS_V1 = SLEEP_SUMMARY_COLUMNS_V1_2 + ("count_awake", "duration_awake", "count_awakenings", "count_restless", "duration_restless")
SLEEP_INTRADAY_COLUMNS = ("device_id",
# For "classic" type, original_level is one of {"awake", "restless", "asleep"}
# For "stages" type, original_level is one of {"wake", "deep", "light", "rem"}
"original_level",
# For "classic" type, unified_level is one of {0, 1} where 0: awake {"awake" + "restless"}, 1: asleep {"asleep"}
# For "stages" type, unified_level is one of {0, 1} where 0: awake {"wake"}, 1: asleep {"deep" + "light" + "rem"}
"unified_level",
# one of {0, 1} where 0: nap, 1: main sleep
"is_main_sleep",
# one of {"classic", "stages"}
"type",
"local_date_time", "local_date", "local_month", "local_day",
"local_day_of_week", "local_time", "local_hour", "local_minute",
"local_day_segment")
def mergeLongAndShortData(data_summary):
longData = pd.DataFrame(columns=['dateTime', 'level', 'seconds'])
shortData = pd.DataFrame(columns=['dateTime','level', 'seconds'])
windowLength = 30
for data in data_summary['data']:
origEntry = data
counter = 0
numberOfSplits = origEntry['seconds']//windowLength
for times in range(numberOfSplits):
newRow = {'dateTime':dateutil.parser.parse(origEntry['dateTime'])+timedelta(seconds=counter*windowLength),'level':origEntry['level'],'seconds':windowLength}
longData = longData.append(newRow, ignore_index = True)
counter = counter + 1
for data in data_summary['shortData']:
origEntry = data
counter = 0
numberOfSplits = origEntry['seconds']//windowLength
for times in range(numberOfSplits):
newRow = {'dateTime':dateutil.parser.parse(origEntry['dateTime'])+timedelta(seconds=counter*windowLength),'level':origEntry['level'],'seconds':windowLength}
shortData = shortData.append(newRow,ignore_index = True)
counter = counter + 1
longData.set_index('dateTime',inplace=True)
shortData.set_index('dateTime',inplace=True)
longData['level'] = np.where(longData.index.isin(shortData.index) == True,'wake',longData['level'])
longData.reset_index(inplace=True)
return longData.values.tolist()
def classicData1min(data_summary):
dataList = list()
for data in data_summary['data']:
origEntry = data
counter = 0
timeDuration = 60
numberOfSplits = origEntry['seconds']//timeDuration
for times in range(numberOfSplits):
newRow = {'dateTime':dateutil.parser.parse(origEntry['dateTime'])+timedelta(seconds=counter*timeDuration),'level':origEntry['level'],'seconds':timeDuration}
dataList.append(newRow)
counter = counter + 1
# print(dataList)
return dataList
# Parse one record for sleep API version 1
def parseOneRecordForV1(record, device_id, d_is_main_sleep, records_summary, records_intraday, HOUR2EPOCH):
# Summary data
sleep_record_type = "classic"
d_start_datetime = datetime.strptime(record["startTime"][:18], "%Y-%m-%dT%H:%M:%S")
d_end_datetime = datetime.strptime(record["endTime"][:18], "%Y-%m-%dT%H:%M:%S")
row_summary = (device_id, record["efficiency"],
record["minutesAfterWakeup"], record["minutesAsleep"], record["minutesAwake"], record["minutesToFallAsleep"], record["timeInBed"],
d_is_main_sleep, sleep_record_type,
d_start_datetime, d_end_datetime,
d_start_datetime.date(), d_end_datetime.date(),
HOUR2EPOCH[d_start_datetime.hour], HOUR2EPOCH[d_end_datetime.hour],
record["awakeCount"], record["awakeDuration"], record["awakeningsCount"],
record["restlessCount"], record["restlessDuration"])
records_summary.append(row_summary)
# Intraday data
start_date = d_start_datetime.date()
end_date = d_end_datetime.date()
is_before_midnight = True
curr_date = start_date
for data in record["minuteData"]:
# For overnight episodes, use end_date once we are over midnight
d_time = datetime.strptime(data["dateTime"], '%H:%M:%S').time()
if is_before_midnight and d_time.hour == 0:
curr_date = end_date
d_datetime = datetime.combine(curr_date, d_time)
# API 1.2 stores original_level as strings, so we convert original_levels of API 1 to strings too
# (1: "asleep", 2: "restless", 3: "awake")
d_original_level = SLEEP_CODE2LEVEL[int(data["value"])-1]
# unified_level summarises original_level (we came up with this classification)
# 0 is awake, 1 is asleep
# {"awake" + "restless"} are set to 0 and {"asleep"} is set to 1
d_unified_level = 0 if d_original_level == "awake" or d_original_level == "restless" else 1
row_intraday = (device_id,
d_original_level, d_unified_level, d_is_main_sleep, sleep_record_type,
d_datetime, d_datetime.date(), d_datetime.month, d_datetime.day,
d_datetime.weekday(), d_datetime.time(), d_datetime.hour, d_datetime.minute,
HOUR2EPOCH[d_datetime.hour])
records_intraday.append(row_intraday)
return records_summary, records_intraday
# Parse one record for sleep API version 1.2
def parseOneRecordForV12(record, device_id, d_is_main_sleep, records_summary, records_intraday, HOUR2EPOCH):
# Summary data
sleep_record_type = record['type']
d_start_datetime = datetime.strptime(record["startTime"][:18], "%Y-%m-%dT%H:%M:%S")
d_end_datetime = datetime.strptime(record["endTime"][:18], "%Y-%m-%dT%H:%M:%S")
row_summary = (device_id, record["efficiency"],
record["minutesAfterWakeup"], record["minutesAsleep"], record["minutesAwake"], record["minutesToFallAsleep"], record["timeInBed"],
d_is_main_sleep, sleep_record_type,
d_start_datetime, d_end_datetime,
d_start_datetime.date(), d_end_datetime.date(),
HOUR2EPOCH[d_start_datetime.hour], HOUR2EPOCH[d_end_datetime.hour])
records_summary.append(row_summary)
if sleep_record_type == 'classic':
# Intraday data
start_date = d_start_datetime.date()
end_date = d_end_datetime.date()
is_before_midnight = True
curr_date = start_date
data_summary = record['levels']
dataSplitted = classicData1min(data_summary) ##Calling the function to split the data in regular 60 seconds interval
for data in dataSplitted:
# For overnight episodes, use end_date once we are over midnight
d_time = data["dateTime"].time()
if is_before_midnight and d_time.hour == 0:
curr_date = end_date
d_datetime = datetime.combine(curr_date, d_time)
d_original_level = data["level"]
d_unified_level = 0 if d_original_level == "awake" or d_original_level == "restless" else 1
row_intraday = (device_id,
d_original_level, d_unified_level, d_is_main_sleep, sleep_record_type,
d_datetime, d_datetime.date(), d_datetime.month, d_datetime.day,
d_datetime.weekday(), d_datetime.time(), d_datetime.hour, d_datetime.minute,
HOUR2EPOCH[d_datetime.hour])
records_intraday.append(row_intraday)
else:
## for sleep type "stages"
start_date = d_start_datetime.date()
end_date = d_end_datetime.date()
is_before_midnight = True
curr_date = start_date
data_summary = record['levels']
dataList = mergeLongAndShortData(data_summary)
for data in dataList:
d_time = data[0].time()
if is_before_midnight and d_time.hour == 0:
curr_date = end_date
d_datetime = datetime.combine(curr_date, d_time)
d_original_level = data[1]
d_unified_level = 1 if d_original_level == "deep" or d_original_level == "light" or d_original_level == "rem" else 0
row_intraday = (device_id,
d_original_level, d_unified_level, d_is_main_sleep, sleep_record_type,
d_datetime, d_datetime.date(), d_datetime.month, d_datetime.day,
d_datetime.weekday(), d_datetime.time(), d_datetime.hour, d_datetime.minute,
HOUR2EPOCH[d_datetime.hour])
records_intraday.append(row_intraday)
return records_summary, records_intraday
def parseSleepData(sleep_data, HOUR2EPOCH):
SLEEP_SUMMARY_COLUMNS = SLEEP_SUMMARY_COLUMNS_V1_2
if sleep_data.empty:
return pd.DataFrame(columns=SLEEP_SUMMARY_COLUMNS), pd.DataFrame(columns=SLEEP_INTRADAY_COLUMNS)
device_id = sleep_data["device_id"].iloc[0]
records_summary, records_intraday = [], []
# Parse JSON into individual records
for multi_record in sleep_data.fitbit_data:
for record in json.loads(multi_record)["sleep"]:
# Whether the sleep episode is nap (0) or main sleep (1)
d_is_main_sleep = 1 if record["isMainSleep"] else 0
# For sleep API version 1
if "awakeCount" in record:
SLEEP_SUMMARY_COLUMNS = SLEEP_SUMMARY_COLUMNS_V1
records_summary, records_intraday = parseOneRecordForV1(record, device_id, d_is_main_sleep, records_summary, records_intraday, HOUR2EPOCH)
# For sleep API version 1.2
else:
SLEEP_SUMMARY_COLUMNS = SLEEP_SUMMARY_COLUMNS_V1_2
records_summary, records_intraday = parseOneRecordForV12(record, device_id, d_is_main_sleep, records_summary, records_intraday, HOUR2EPOCH)
return pd.DataFrame(data=records_summary, columns=SLEEP_SUMMARY_COLUMNS), pd.DataFrame(data=records_intraday, columns=SLEEP_INTRADAY_COLUMNS)