Parse Fitbit summary and intraday data; Extract Fitbit daily features from summary data
parent
d07bb9ed5f
commit
915bdd04b1
|
@ -59,15 +59,19 @@ rule all:
|
||||||
expand("data/processed/{pid}/applications_foreground_{day_segment}.csv",
|
expand("data/processed/{pid}/applications_foreground_{day_segment}.csv",
|
||||||
pid = config["PIDS"],
|
pid = config["PIDS"],
|
||||||
day_segment = config["APPLICATIONS_FOREGROUND"]["DAY_SEGMENTS"]),
|
day_segment = config["APPLICATIONS_FOREGROUND"]["DAY_SEGMENTS"]),
|
||||||
expand("data/raw/{pid}/fitbit_{fitbit_sensor}_with_datetime.csv",
|
expand("data/raw/{pid}/fitbit_{fitbit_sensor}_{fitbit_data_type}_with_datetime.csv",
|
||||||
pid=config["PIDS"],
|
pid=config["PIDS"],
|
||||||
fitbit_sensor=config["FITBIT_SENSORS"]),
|
fitbit_sensor=config["FITBIT_SENSORS"],
|
||||||
|
fitbit_data_type=config["FITBIT_DATA_TYPE"]),
|
||||||
expand("data/processed/{pid}/fitbit_heartrate_{day_segment}.csv",
|
expand("data/processed/{pid}/fitbit_heartrate_{day_segment}.csv",
|
||||||
pid = config["PIDS"],
|
pid = config["PIDS"],
|
||||||
day_segment = config["HEARTRATE"]["DAY_SEGMENTS"]),
|
day_segment = config["HEARTRATE"]["DAY_SEGMENTS"]),
|
||||||
expand("data/processed/{pid}/fitbit_step_{day_segment}.csv",
|
expand("data/processed/{pid}/fitbit_step_{day_segment}.csv",
|
||||||
pid = config["PIDS"],
|
pid = config["PIDS"],
|
||||||
day_segment = config["STEP"]["DAY_SEGMENTS"]),
|
day_segment = config["STEP"]["DAY_SEGMENTS"]),
|
||||||
|
expand("data/processed/{pid}/fitbit_sleep_{day_segment}.csv",
|
||||||
|
pid = config["PIDS"],
|
||||||
|
day_segment = config["SLEEP"]["DAY_SEGMENTS"]),
|
||||||
expand("data/processed/{pid}/wifi_{segment}.csv",
|
expand("data/processed/{pid}/wifi_{segment}.csv",
|
||||||
pid=config["PIDS"],
|
pid=config["PIDS"],
|
||||||
segment = config["WIFI"]["DAY_SEGMENTS"]),
|
segment = config["WIFI"]["DAY_SEGMENTS"]),
|
||||||
|
|
|
@ -3,6 +3,7 @@ SENSORS: [applications_crashes, applications_foreground, applications_notificati
|
||||||
|
|
||||||
FITBIT_TABLE: [fitbit_data]
|
FITBIT_TABLE: [fitbit_data]
|
||||||
FITBIT_SENSORS: [heartrate, steps, sleep, calories]
|
FITBIT_SENSORS: [heartrate, steps, sleep, calories]
|
||||||
|
FITBIT_DATA_TYPE: [summary, intraday]
|
||||||
|
|
||||||
# Participants to include in the analysis
|
# Participants to include in the analysis
|
||||||
# You must create a file for each participant
|
# You must create a file for each participant
|
||||||
|
@ -114,6 +115,7 @@ APPLICATIONS_FOREGROUND:
|
||||||
HEARTRATE:
|
HEARTRATE:
|
||||||
DAY_SEGMENTS: *day_segments
|
DAY_SEGMENTS: *day_segments
|
||||||
FEATURES: ["maxhr", "minhr", "avghr", "medianhr", "modehr", "stdhr", "diffmaxmodehr", "diffminmodehr", "entropyhr", "lengthoutofrange", "lengthfatburn", "lengthcardio", "lengthpeak"]
|
FEATURES: ["maxhr", "minhr", "avghr", "medianhr", "modehr", "stdhr", "diffmaxmodehr", "diffminmodehr", "entropyhr", "lengthoutofrange", "lengthfatburn", "lengthcardio", "lengthpeak"]
|
||||||
|
DAILY_FEATURES_FROM_SUMMARY_DATA: ["restinghr"] # calories related features might be inaccurate: ["caloriesoutofrange", "caloriesfatburn", "caloriescardio", "caloriespeak"]
|
||||||
|
|
||||||
STEP:
|
STEP:
|
||||||
DAY_SEGMENTS: *day_segments
|
DAY_SEGMENTS: *day_segments
|
||||||
|
@ -124,6 +126,11 @@ STEP:
|
||||||
THRESHOLD_ACTIVE_BOUT: 10 # steps
|
THRESHOLD_ACTIVE_BOUT: 10 # steps
|
||||||
INCLUDE_ZERO_STEP_ROWS: True
|
INCLUDE_ZERO_STEP_ROWS: True
|
||||||
|
|
||||||
|
SLEEP:
|
||||||
|
DAY_SEGMENTS: *day_segments
|
||||||
|
SLEEP_TYPES: ["main", "nap", "all"]
|
||||||
|
DAILY_FEATURES_FROM_SUMMARY_DATA: ["sumdurationafterwakeup", "sumdurationasleep", "sumdurationawake", "sumdurationtofallasleep", "sumdurationinbed", "avgefficiency", "countepisode"]
|
||||||
|
|
||||||
WIFI:
|
WIFI:
|
||||||
DAY_SEGMENTS: *day_segments
|
DAY_SEGMENTS: *day_segments
|
||||||
FEATURES: ["countscans", "uniquedevices", "countscansmostuniquedevice"]
|
FEATURES: ["countscans", "uniquedevices", "countscansmostuniquedevice"]
|
||||||
|
@ -133,7 +140,7 @@ PARAMS_FOR_ANALYSIS:
|
||||||
SOURCES: &sources ["phone_features", "fitbit_features", "phone_fitbit_features"]
|
SOURCES: &sources ["phone_features", "fitbit_features", "phone_fitbit_features"]
|
||||||
DAY_SEGMENTS: *day_segments
|
DAY_SEGMENTS: *day_segments
|
||||||
PHONE_FEATURES: [accelerometer, applications_foreground, battery, call_incoming, call_missed, call_outgoing, activity_recognition, light, location_barnett, screen, sms_received, sms_sent]
|
PHONE_FEATURES: [accelerometer, applications_foreground, battery, call_incoming, call_missed, call_outgoing, activity_recognition, light, location_barnett, screen, sms_received, sms_sent]
|
||||||
FITBIT_FEATURES: [fitbit_heartrate, fitbit_step]
|
FITBIT_FEATURES: [fitbit_heartrate, fitbit_step, fitbit_sleep]
|
||||||
PHONE_FITBIT_FEATURES: "" # This array is merged in the input_merge_features_of_single_participant function in models.snakefile
|
PHONE_FITBIT_FEATURES: "" # This array is merged in the input_merge_features_of_single_participant function in models.snakefile
|
||||||
DEMOGRAPHIC_FEATURES: [age, gender, inpatientdays]
|
DEMOGRAPHIC_FEATURES: [age, gender, inpatientdays]
|
||||||
CATEGORICAL_DEMOGRAPHIC_FEATURES: ["gender"]
|
CATEGORICAL_DEMOGRAPHIC_FEATURES: ["gender"]
|
||||||
|
|
|
@ -167,12 +167,25 @@ rule applications_foreground_features:
|
||||||
script:
|
script:
|
||||||
"../src/features/applications_foreground_features.py"
|
"../src/features/applications_foreground_features.py"
|
||||||
|
|
||||||
|
rule wifi_features:
|
||||||
|
input:
|
||||||
|
"data/raw/{pid}/wifi_with_datetime.csv"
|
||||||
|
params:
|
||||||
|
day_segment = "{day_segment}",
|
||||||
|
features = config["WIFI"]["FEATURES"]
|
||||||
|
output:
|
||||||
|
"data/processed/{pid}/wifi_{day_segment}.csv"
|
||||||
|
script:
|
||||||
|
"../src/features/wifi_features.R"
|
||||||
|
|
||||||
rule fitbit_heartrate_features:
|
rule fitbit_heartrate_features:
|
||||||
input:
|
input:
|
||||||
"data/raw/{pid}/fitbit_heartrate_with_datetime.csv",
|
heartrate_summary_data = "data/raw/{pid}/fitbit_heartrate_summary_with_datetime.csv",
|
||||||
|
heartrate_intraday_data = "data/raw/{pid}/fitbit_heartrate_intraday_with_datetime.csv"
|
||||||
params:
|
params:
|
||||||
day_segment = "{day_segment}",
|
day_segment = "{day_segment}",
|
||||||
features = config["HEARTRATE"]["FEATURES"],
|
features = config["HEARTRATE"]["FEATURES"],
|
||||||
|
daily_features_from_summary_data = config["HEARTRATE"]["DAILY_FEATURES_FROM_SUMMARY_DATA"]
|
||||||
output:
|
output:
|
||||||
"data/processed/{pid}/fitbit_heartrate_{day_segment}.csv"
|
"data/processed/{pid}/fitbit_heartrate_{day_segment}.csv"
|
||||||
script:
|
script:
|
||||||
|
@ -193,13 +206,15 @@ rule fitbit_step_features:
|
||||||
script:
|
script:
|
||||||
"../src/features/fitbit_step_features.py"
|
"../src/features/fitbit_step_features.py"
|
||||||
|
|
||||||
rule wifi_features:
|
rule fitbit_sleep_features:
|
||||||
input:
|
input:
|
||||||
"data/raw/{pid}/wifi_with_datetime.csv"
|
sleep_summary_data = "data/raw/{pid}/fitbit_sleep_summary_with_datetime.csv",
|
||||||
|
sleep_intraday_data = "data/raw/{pid}/fitbit_sleep_intraday_with_datetime.csv"
|
||||||
params:
|
params:
|
||||||
day_segment = "{day_segment}",
|
day_segment = "{day_segment}",
|
||||||
features = config["WIFI"]["FEATURES"]
|
sleep_types = config["SLEEP"]["SLEEP_TYPES"],
|
||||||
|
daily_features_from_summary_data = config["SLEEP"]["DAILY_FEATURES_FROM_SUMMARY_DATA"]
|
||||||
output:
|
output:
|
||||||
"data/processed/{pid}/wifi_{day_segment}.csv"
|
"data/processed/{pid}/fitbit_sleep_{day_segment}.csv"
|
||||||
script:
|
script:
|
||||||
"../src/features/wifi_features.R"
|
"../src/features/fitbit_sleep_features.py"
|
||||||
|
|
|
@ -99,7 +99,8 @@ rule fitbit_with_datetime:
|
||||||
local_timezone = config["READABLE_DATETIME"]["FIXED_TIMEZONE"],
|
local_timezone = config["READABLE_DATETIME"]["FIXED_TIMEZONE"],
|
||||||
fitbit_sensor = "{fitbit_sensor}"
|
fitbit_sensor = "{fitbit_sensor}"
|
||||||
output:
|
output:
|
||||||
"data/raw/{pid}/fitbit_{fitbit_sensor}_with_datetime.csv"
|
summary_data = "data/raw/{pid}/fitbit_{fitbit_sensor}_summary_with_datetime.csv",
|
||||||
|
intraday_data = "data/raw/{pid}/fitbit_{fitbit_sensor}_intraday_with_datetime.csv"
|
||||||
script:
|
script:
|
||||||
"../src/data/fitbit_readable_datetime.py"
|
"../src/data/fitbit_readable_datetime.py"
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,35 @@
|
||||||
|
import json
|
||||||
|
import pandas as pd
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
|
|
||||||
|
CALORIES_INTRADAY_COLUMNS = ("device_id",
|
||||||
|
"level", "mets", "value",
|
||||||
|
"local_date_time", "local_date", "local_month", "local_day",
|
||||||
|
"local_day_of_week", "local_time", "local_hour", "local_minute",
|
||||||
|
"local_day_segment")
|
||||||
|
|
||||||
|
def parseCaloriesData(calories_data, HOUR2EPOCH):
|
||||||
|
if calories_data.empty:
|
||||||
|
return pd.DataFrame(), pd.DataFrame(columns=CALORIES_INTRADAY_COLUMNS)
|
||||||
|
device_id = calories_data["device_id"].iloc[0]
|
||||||
|
records_intraday = []
|
||||||
|
# Parse JSON into individual records
|
||||||
|
for record in calories_data.fitbit_data:
|
||||||
|
record = json.loads(record) # Parse text into JSON
|
||||||
|
curr_date = datetime.strptime(
|
||||||
|
record["activities-calories"][0]["dateTime"], "%Y-%m-%d")
|
||||||
|
dataset = record["activities-calories-intraday"]["dataset"]
|
||||||
|
for data in dataset:
|
||||||
|
d_time = datetime.strptime(data["time"], '%H:%M:%S').time()
|
||||||
|
d_datetime = datetime.combine(curr_date, d_time)
|
||||||
|
|
||||||
|
row_intraday = (device_id,
|
||||||
|
data["level"], data["mets"], data["value"],
|
||||||
|
d_datetime, d_datetime.date(), d_datetime.month, d_datetime.day,
|
||||||
|
d_datetime.weekday(), d_datetime.time(), d_datetime.hour, d_datetime.minute,
|
||||||
|
HOUR2EPOCH[d_datetime.hour])
|
||||||
|
|
||||||
|
records_intraday.append(row_intraday)
|
||||||
|
|
||||||
|
return pd.DataFrame(), pd.DataFrame(data=records_intraday, columns=CALORIES_INTRADAY_COLUMNS)
|
|
@ -0,0 +1,114 @@
|
||||||
|
import json
|
||||||
|
import pandas as pd
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
|
|
||||||
|
HR_SUMMARY_COLUMNS = ("device_id",
|
||||||
|
"local_date",
|
||||||
|
"heartrate_daily_restinghr",
|
||||||
|
"heartrate_daily_caloriesoutofrange",
|
||||||
|
"heartrate_daily_caloriesfatburn",
|
||||||
|
"heartrate_daily_caloriescardio",
|
||||||
|
"heartrate_daily_caloriespeak")
|
||||||
|
|
||||||
|
HR_INTRADAY_COLUMNS = ("device_id",
|
||||||
|
"heartrate", "heartrate_zone",
|
||||||
|
"local_date_time", "local_date", "local_month", "local_day",
|
||||||
|
"local_day_of_week", "local_time", "local_hour", "local_minute",
|
||||||
|
"local_day_segment")
|
||||||
|
|
||||||
|
def parseHeartrateZones(heartrate_data):
|
||||||
|
# Get the range of heartrate zones: outofrange, fatburn, cardio, peak
|
||||||
|
# refer to: https://help.fitbit.com/articles/en_US/Help_article/1565
|
||||||
|
|
||||||
|
heartrate_fitbit_data = json.loads(heartrate_data["fitbit_data"].iloc[0])["activities-heart"][0]
|
||||||
|
# API Version X: not sure the exact version
|
||||||
|
if "heartRateZones" in heartrate_fitbit_data:
|
||||||
|
heartrate_zones = heartrate_fitbit_data["heartRateZones"]
|
||||||
|
# API VERSION Y: not sure the exact version
|
||||||
|
elif "value" in heartrate_fitbit_data:
|
||||||
|
heartrate_zones = heartrate_fitbit_data["value"]["heartRateZones"]
|
||||||
|
else:
|
||||||
|
raise ValueError("Heartrate zone are stored in an unkown format, this could mean Fitbit's heartrate API changed")
|
||||||
|
|
||||||
|
heartrate_zones_range = {}
|
||||||
|
for hrzone in heartrate_zones:
|
||||||
|
heartrate_zones_range[hrzone["name"].lower().replace(" ", "")] = [hrzone["min"], hrzone["max"]]
|
||||||
|
return heartrate_zones_range
|
||||||
|
|
||||||
|
def parseHeartrateSummaryData(record_summary, device_id, curr_date):
|
||||||
|
# API Version X: not sure the exact version
|
||||||
|
if "heartRateZones" in record_summary:
|
||||||
|
heartrate_zones = record_summary["heartRateZones"]
|
||||||
|
d_resting_heartrate = record_summary["value"] if "value" in record_summary else None
|
||||||
|
# API VERSION Y: not sure the exact version
|
||||||
|
elif "value" in record_summary:
|
||||||
|
heartrate_zones = record_summary["value"]["heartRateZones"]
|
||||||
|
d_resting_heartrate = record_summary["value"]["restingHeartRate"] if "restingHeartRate" in record_summary["value"] else None
|
||||||
|
else:
|
||||||
|
ValueError("Heartrate zone are stored in an unkown format, this could mean Fitbit's heartrate API changed")
|
||||||
|
|
||||||
|
if "caloriesOut" in heartrate_zones[0]:
|
||||||
|
d_calories_outofrange = heartrate_zones[0]["caloriesOut"]
|
||||||
|
d_calories_fatburn = heartrate_zones[1]["caloriesOut"]
|
||||||
|
d_calories_cardio = heartrate_zones[2]["caloriesOut"]
|
||||||
|
d_calories_peak = heartrate_zones[3]["caloriesOut"]
|
||||||
|
else:
|
||||||
|
d_calories_outofrange, d_calories_fatburn, d_calories_cardio, d_calories_peak = None, None, None, None
|
||||||
|
|
||||||
|
row_summary = (device_id,
|
||||||
|
curr_date,
|
||||||
|
d_resting_heartrate,
|
||||||
|
d_calories_outofrange,
|
||||||
|
d_calories_fatburn,
|
||||||
|
d_calories_cardio,
|
||||||
|
d_calories_peak)
|
||||||
|
return row_summary
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def parseHeartrateIntradayData(records_intraday, dataset, device_id, curr_date, heartrate_zones_range, HOUR2EPOCH):
|
||||||
|
for data in dataset:
|
||||||
|
d_time = datetime.strptime(data["time"], '%H:%M:%S').time()
|
||||||
|
d_datetime = datetime.combine(curr_date, d_time)
|
||||||
|
d_hr = data["value"]
|
||||||
|
|
||||||
|
# Get heartrate zone by range: min <= heartrate < max
|
||||||
|
d_hrzone = None
|
||||||
|
for hrzone, hrrange in heartrate_zones_range.items():
|
||||||
|
if d_hr >= hrrange[0] and d_hr < hrrange[1]:
|
||||||
|
d_hrzone = hrzone
|
||||||
|
break
|
||||||
|
|
||||||
|
row_intraday = (device_id,
|
||||||
|
d_hr, d_hrzone,
|
||||||
|
d_datetime, d_datetime.date(), d_datetime.month, d_datetime.day,
|
||||||
|
d_datetime.weekday(), d_datetime.time(), d_datetime.hour, d_datetime.minute,
|
||||||
|
HOUR2EPOCH[d_datetime.hour])
|
||||||
|
|
||||||
|
records_intraday.append(row_intraday)
|
||||||
|
return records_intraday
|
||||||
|
|
||||||
|
|
||||||
|
def parseHeartrateData(heartrate_data, HOUR2EPOCH):
|
||||||
|
if heartrate_data.empty:
|
||||||
|
return pd.DataFrame(columns=HR_COLUMNS)
|
||||||
|
device_id = heartrate_data["device_id"].iloc[0]
|
||||||
|
records_summary, records_intraday = [], []
|
||||||
|
|
||||||
|
heartrate_zones_range = parseHeartrateZones(heartrate_data)
|
||||||
|
|
||||||
|
# Parse JSON into individual records
|
||||||
|
for record in heartrate_data.fitbit_data:
|
||||||
|
record = json.loads(record) # Parse text into JSON
|
||||||
|
curr_date = datetime.strptime(record["activities-heart"][0]["dateTime"], "%Y-%m-%d")
|
||||||
|
|
||||||
|
record_summary = record["activities-heart"][0]
|
||||||
|
row_summary = parseHeartrateSummaryData(record_summary, device_id, curr_date)
|
||||||
|
records_summary.append(row_summary)
|
||||||
|
|
||||||
|
dataset = record["activities-heart-intraday"]["dataset"]
|
||||||
|
records_intraday = parseHeartrateIntradayData(records_intraday, dataset, device_id, curr_date, heartrate_zones_range, HOUR2EPOCH)
|
||||||
|
|
||||||
|
return pd.DataFrame(data=records_summary, columns=HR_SUMMARY_COLUMNS), pd.DataFrame(data=records_intraday, columns=HR_INTRADAY_COLUMNS)
|
|
@ -0,0 +1,109 @@
|
||||||
|
import json
|
||||||
|
import pandas as pd
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
|
|
||||||
|
SLEEP_CODE2LEVEL = ["asleep", "restless", "awake"]
|
||||||
|
|
||||||
|
|
||||||
|
SLEEP_SUMMARY_COLUMNS_V1_2 = ("device_id", "efficiency",
|
||||||
|
"minutes_after_wakeup", "minutes_asleep", "minutes_awake", "minutes_to_fall_asleep", "minutes_in_bed",
|
||||||
|
"is_main_sleep", "type",
|
||||||
|
"local_start_date_time", "local_end_date_time",
|
||||||
|
"local_start_date", "local_end_date",
|
||||||
|
"local_start_day_segment", "local_end_day_segment")
|
||||||
|
SLEEP_SUMMARY_COLUMNS_V1 = SLEEP_SUMMARY_COLUMNS_V1_2 + ("count_awake", "duration_awake", "count_awakenings", "count_restless", "duration_restless")
|
||||||
|
|
||||||
|
SLEEP_INTRADAY_COLUMNS = ("device_id",
|
||||||
|
# For "classic" type, original_level is one of {"awake", "restless", "asleep"}
|
||||||
|
# For "stages" type, original_level is one of {"wake", "deep", "light", "rem"}
|
||||||
|
"original_level",
|
||||||
|
# For "classic" type, unified_level is one of {0, 1} where 0: awake {"awake" + "restless"}, 1: asleep {"asleep"}
|
||||||
|
# For "stages" type, unified_level is one of {0, 1} where 0: awake {"wake"}, 1: asleep {"deep" + "light" + "rem"}
|
||||||
|
"unified_level",
|
||||||
|
# one of {0, 1} where 0: nap, 1: main sleep
|
||||||
|
"is_main_sleep",
|
||||||
|
# one of {"classic", "stages"}
|
||||||
|
"type",
|
||||||
|
"local_date_time", "local_date", "local_month", "local_day",
|
||||||
|
"local_day_of_week", "local_time", "local_hour", "local_minute",
|
||||||
|
"local_day_segment")
|
||||||
|
|
||||||
|
# Parse one record for sleep API version 1
|
||||||
|
def parseOneRecordForV1(record, device_id, d_is_main_sleep, records_summary, records_intraday, HOUR2EPOCH):
|
||||||
|
|
||||||
|
# Summary data
|
||||||
|
sleep_record_type = "classic"
|
||||||
|
|
||||||
|
d_start_datetime = datetime.strptime(record["startTime"][:18], "%Y-%m-%dT%H:%M:%S")
|
||||||
|
d_end_datetime = datetime.strptime(record["endTime"][:18], "%Y-%m-%dT%H:%M:%S")
|
||||||
|
|
||||||
|
row_summary = (device_id, record["efficiency"],
|
||||||
|
record["minutesAfterWakeup"], record["minutesAsleep"], record["minutesAwake"], record["minutesToFallAsleep"], record["timeInBed"],
|
||||||
|
d_is_main_sleep, sleep_record_type,
|
||||||
|
d_start_datetime, d_end_datetime,
|
||||||
|
d_start_datetime.date(), d_end_datetime.date(),
|
||||||
|
HOUR2EPOCH[d_start_datetime.hour], HOUR2EPOCH[d_end_datetime.hour],
|
||||||
|
record["awakeCount"], record["awakeDuration"], record["awakeningsCount"],
|
||||||
|
record["restlessCount"], record["restlessDuration"])
|
||||||
|
|
||||||
|
records_summary.append(row_summary)
|
||||||
|
|
||||||
|
# Intraday data
|
||||||
|
start_date = d_start_datetime.date()
|
||||||
|
end_date = d_end_datetime.date()
|
||||||
|
is_before_midnight = True
|
||||||
|
curr_date = start_date
|
||||||
|
for data in record["minuteData"]:
|
||||||
|
# For overnight episodes, use end_date once we are over midnight
|
||||||
|
d_time = datetime.strptime(data["dateTime"], '%H:%M:%S').time()
|
||||||
|
if is_before_midnight and d_time.hour == 0:
|
||||||
|
curr_date = end_date
|
||||||
|
d_datetime = datetime.combine(curr_date, d_time)
|
||||||
|
|
||||||
|
# API 1.2 stores original_level as strings, so we convert original_levels of API 1 to strings too
|
||||||
|
# (1: "asleep", 2: "restless", 3: "awake")
|
||||||
|
d_original_level = SLEEP_CODE2LEVEL[int(data["value"])-1]
|
||||||
|
|
||||||
|
# unified_level summarises original_level (we came up with this classification)
|
||||||
|
# 0 is awake, 1 is asleep
|
||||||
|
# {"awake" + "restless"} are set to 0 and {"asleep"} is set to 1
|
||||||
|
d_unified_level = 0 if d_original_level == "awake" or d_original_level == "restless" else 1
|
||||||
|
|
||||||
|
row_intraday = (device_id,
|
||||||
|
d_original_level, d_unified_level, d_is_main_sleep, sleep_record_type,
|
||||||
|
d_datetime, d_datetime.date(), d_datetime.month, d_datetime.day,
|
||||||
|
d_datetime.weekday(), d_datetime.time(), d_datetime.hour, d_datetime.minute,
|
||||||
|
HOUR2EPOCH[d_datetime.hour])
|
||||||
|
|
||||||
|
records_intraday.append(row_intraday)
|
||||||
|
|
||||||
|
return records_summary, records_intraday
|
||||||
|
|
||||||
|
# Parse one record for sleep API version 1.2
|
||||||
|
def parseOneRecordForV12(record, d_is_main_sleep, records_summary, records_intraday):
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def parseSleepData(sleep_data, HOUR2EPOCH):
|
||||||
|
if sleep_data.empty:
|
||||||
|
return pd.DataFrame(columns=SLEEP_SUMMARY_COLUMNS_V1), pd.DataFrame(columns=SLEEP_INTRADAY_COLUMNS)
|
||||||
|
device_id = sleep_data["device_id"].iloc[0]
|
||||||
|
records_summary, records_intraday = [], []
|
||||||
|
# Parse JSON into individual records
|
||||||
|
for multi_record in sleep_data.fitbit_data:
|
||||||
|
for record in json.loads(multi_record)["sleep"]:
|
||||||
|
# Whether the sleep episode is nap (0) or main sleep (1)
|
||||||
|
d_is_main_sleep = 1 if record["isMainSleep"] else 0
|
||||||
|
|
||||||
|
# For sleep API version 1
|
||||||
|
if "awakeCount" in record:
|
||||||
|
SLEEP_SUMMARY_COLUMNS = SLEEP_SUMMARY_COLUMNS_V1
|
||||||
|
records_summary, records_intraday = parseOneRecordForV1(record, device_id, d_is_main_sleep, records_summary, records_intraday, HOUR2EPOCH)
|
||||||
|
# For sleep API version 1.2
|
||||||
|
else:
|
||||||
|
SLEEP_SUMMARY_COLUMNS = SLEEP_SUMMARY_COLUMNS_V1_2
|
||||||
|
raise ValueError("Sleep data for API v1.2 is not supported yet.")
|
||||||
|
|
||||||
|
return pd.DataFrame(data=records_summary, columns=SLEEP_SUMMARY_COLUMNS), pd.DataFrame(data=records_intraday, columns=SLEEP_INTRADAY_COLUMNS)
|
|
@ -0,0 +1,35 @@
|
||||||
|
import json
|
||||||
|
import pandas as pd
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
|
STEPS_INTRADAY_COLUMNS = ("device_id",
|
||||||
|
"steps",
|
||||||
|
"local_date_time", "local_date", "local_month", "local_day",
|
||||||
|
"local_day_of_week", "local_time", "local_hour", "local_minute",
|
||||||
|
"local_day_segment")
|
||||||
|
|
||||||
|
|
||||||
|
def parseStepsData(steps_data, HOUR2EPOCH):
|
||||||
|
if steps_data.empty:
|
||||||
|
return pd.DataFrame(), pd.DataFrame(columns=STEPS_COLUMNS)
|
||||||
|
device_id = steps_data["device_id"].iloc[0]
|
||||||
|
records_intraday = []
|
||||||
|
# Parse JSON into individual records
|
||||||
|
for record in steps_data.fitbit_data:
|
||||||
|
record = json.loads(record) # Parse text into JSON
|
||||||
|
curr_date = datetime.strptime(
|
||||||
|
record["activities-steps"][0]["dateTime"], "%Y-%m-%d")
|
||||||
|
dataset = record["activities-steps-intraday"]["dataset"]
|
||||||
|
for data in dataset:
|
||||||
|
d_time = datetime.strptime(data["time"], '%H:%M:%S').time()
|
||||||
|
d_datetime = datetime.combine(curr_date, d_time)
|
||||||
|
|
||||||
|
row_intraday = (device_id,
|
||||||
|
data["value"],
|
||||||
|
d_datetime, d_datetime.date(), d_datetime.month, d_datetime.day,
|
||||||
|
d_datetime.weekday(), d_datetime.time(), d_datetime.hour, d_datetime.minute,
|
||||||
|
HOUR2EPOCH[d_datetime.hour])
|
||||||
|
|
||||||
|
records_intraday.append(row_intraday)
|
||||||
|
|
||||||
|
return pd.DataFrame(), pd.DataFrame(data=records_intraday, columns=STEPS_INTRADAY_COLUMNS)
|
|
@ -1,6 +1,10 @@
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
import pytz, json
|
import pytz, json
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
from fitbit_parse_sensors.fitbit_parse_heartrate import parseHeartrateData
|
||||||
|
from fitbit_parse_sensors.fitbit_parse_sleep import parseSleepData
|
||||||
|
from fitbit_parse_sensors.fitbit_parse_steps import parseStepsData
|
||||||
|
from fitbit_parse_sensors.fitbit_parse_calories import parseCaloriesData
|
||||||
|
|
||||||
|
|
||||||
NIGHT = "night"
|
NIGHT = "night"
|
||||||
|
@ -10,30 +14,6 @@ EVENING = "evening"
|
||||||
HOUR2EPOCH = [NIGHT] * 6 + [MORNING] * 6 + [AFTERNOON] * 6 + [EVENING] * 6
|
HOUR2EPOCH = [NIGHT] * 6 + [MORNING] * 6 + [AFTERNOON] * 6 + [EVENING] * 6
|
||||||
|
|
||||||
|
|
||||||
HR_COLUMNS = ("device_id",
|
|
||||||
"heartrate", "heartrate_zone",
|
|
||||||
"local_date_time", "local_date", "local_month", "local_day",
|
|
||||||
"local_day_of_week", "local_time", "local_hour", "local_minute",
|
|
||||||
"local_day_segment")
|
|
||||||
|
|
||||||
SLEEP_COLUMNS = ("device_id",
|
|
||||||
"sleep", # 1: "asleep", 2: "restless", or 3: "awake"
|
|
||||||
"local_date_time", "local_date", "local_month", "local_day",
|
|
||||||
"local_day_of_week", "local_time", "local_hour", "local_minute",
|
|
||||||
"local_day_segment")
|
|
||||||
|
|
||||||
STEPS_COLUMNS = ("device_id",
|
|
||||||
"steps",
|
|
||||||
"local_date_time", "local_date", "local_month", "local_day",
|
|
||||||
"local_day_of_week", "local_time", "local_hour", "local_minute",
|
|
||||||
"local_day_segment")
|
|
||||||
|
|
||||||
CALORIES_COLUMNS = ("device_id",
|
|
||||||
"level", "mets", "value",
|
|
||||||
"local_date_time", "local_date", "local_month", "local_day",
|
|
||||||
"local_day_of_week", "local_time", "local_hour", "local_minute",
|
|
||||||
"local_day_segment")
|
|
||||||
|
|
||||||
def drop_duplicates(data, local_timezone):
|
def drop_duplicates(data, local_timezone):
|
||||||
"""
|
"""
|
||||||
Data is pulled in intraday manner. Since data will be duplicated until the
|
Data is pulled in intraday manner. Since data will be duplicated until the
|
||||||
|
@ -47,160 +27,6 @@ def drop_duplicates(data, local_timezone):
|
||||||
|
|
||||||
return data
|
return data
|
||||||
|
|
||||||
def parse_steps_data(steps_data):
|
|
||||||
if steps_data.empty:
|
|
||||||
return pd.DataFrame(columns=STEPS_COLUMNS)
|
|
||||||
device_id = steps_data["device_id"].iloc[0]
|
|
||||||
records = []
|
|
||||||
# Parse JSON into individual records
|
|
||||||
for record in steps_data.fitbit_data:
|
|
||||||
record = json.loads(record) # Parse text into JSON
|
|
||||||
curr_date = datetime.strptime(
|
|
||||||
record["activities-steps"][0]["dateTime"], "%Y-%m-%d")
|
|
||||||
dataset = record["activities-steps-intraday"]["dataset"]
|
|
||||||
for data in dataset:
|
|
||||||
d_time = datetime.strptime(data["time"], '%H:%M:%S').time()
|
|
||||||
d_datetime = datetime.combine(curr_date, d_time)
|
|
||||||
|
|
||||||
row = (device_id,
|
|
||||||
data["value"],
|
|
||||||
d_datetime,
|
|
||||||
d_datetime.date(),
|
|
||||||
d_datetime.month,
|
|
||||||
d_datetime.day,
|
|
||||||
d_datetime.weekday(),
|
|
||||||
d_datetime.time(),
|
|
||||||
d_datetime.hour,
|
|
||||||
d_datetime.minute,
|
|
||||||
HOUR2EPOCH[d_datetime.hour])
|
|
||||||
|
|
||||||
records.append(row)
|
|
||||||
|
|
||||||
return pd.DataFrame(data=records, columns=STEPS_COLUMNS)
|
|
||||||
|
|
||||||
def parse_sleep_data(sleep_data):
|
|
||||||
if sleep_data.empty:
|
|
||||||
return pd.DataFrame(columns=SLEEP_COLUMNS)
|
|
||||||
device_id = sleep_data["device_id"].iloc[0]
|
|
||||||
records = []
|
|
||||||
# Parse JSON into individual records
|
|
||||||
for multi_record in sleep_data.fitbit_data:
|
|
||||||
for record in json.loads(multi_record)["sleep"]:
|
|
||||||
|
|
||||||
# Compute date when sleep episodes span two days
|
|
||||||
start_date = datetime.strptime(record["startTime"][:10], "%Y-%m-%d")
|
|
||||||
end_date = datetime.strptime(record["endTime"][:10], "%Y-%m-%d")
|
|
||||||
flag = 1 if start_date == end_date else 0
|
|
||||||
for data in record["minuteData"]:
|
|
||||||
d_time = datetime.strptime(data["dateTime"], '%H:%M:%S').time()
|
|
||||||
if not flag and not d_time.hour:
|
|
||||||
flag = 1
|
|
||||||
curr_date = end_date if flag else start_date
|
|
||||||
d_datetime = datetime.combine(curr_date, d_time)
|
|
||||||
|
|
||||||
row = (device_id,
|
|
||||||
data["value"],
|
|
||||||
d_datetime,
|
|
||||||
d_datetime.date(),
|
|
||||||
d_datetime.month,
|
|
||||||
d_datetime.day,
|
|
||||||
d_datetime.weekday(),
|
|
||||||
d_datetime.time(),
|
|
||||||
d_datetime.hour,
|
|
||||||
d_datetime.minute,
|
|
||||||
HOUR2EPOCH[d_datetime.hour])
|
|
||||||
|
|
||||||
records.append(row)
|
|
||||||
|
|
||||||
return pd.DataFrame(data=records, columns=SLEEP_COLUMNS)
|
|
||||||
|
|
||||||
def parse_heartrate_data(heartrate_data):
|
|
||||||
if heartrate_data.empty:
|
|
||||||
return pd.DataFrame(columns=HR_COLUMNS)
|
|
||||||
device_id = heartrate_data["device_id"].iloc[0]
|
|
||||||
records = []
|
|
||||||
|
|
||||||
# Get the range of heartrate zones: outofrange, fatburn, cardio, peak
|
|
||||||
# refer to: https://help.fitbit.com/articles/en_US/Help_article/1565
|
|
||||||
|
|
||||||
heartrate_fitbit_data = json.loads(heartrate_data["fitbit_data"].iloc[0])["activities-heart"][0]
|
|
||||||
if "heartRateZones" in heartrate_fitbit_data:
|
|
||||||
heartrate_zones = heartrate_fitbit_data["heartRateZones"]
|
|
||||||
elif "value" in heartrate_fitbit_data:
|
|
||||||
heartrate_zones = heartrate_fitbit_data["value"]["heartRateZones"]
|
|
||||||
else:
|
|
||||||
raise ValueError("Please check the format of fitbit heartrate raw data.")
|
|
||||||
|
|
||||||
heartrate_zones_range = {}
|
|
||||||
for hrzone in heartrate_zones:
|
|
||||||
heartrate_zones_range[hrzone["name"].lower().replace(" ", "")] = [hrzone["min"], hrzone["max"]]
|
|
||||||
|
|
||||||
# Parse JSON into individual records
|
|
||||||
for record in heartrate_data.fitbit_data:
|
|
||||||
record = json.loads(record) # Parse text into JSON
|
|
||||||
curr_date = datetime.strptime(record["activities-heart"][0]["dateTime"], "%Y-%m-%d")
|
|
||||||
dataset = record["activities-heart-intraday"]["dataset"]
|
|
||||||
for data in dataset:
|
|
||||||
d_time = datetime.strptime(data["time"], '%H:%M:%S').time()
|
|
||||||
d_datetime = datetime.combine(curr_date, d_time)
|
|
||||||
d_hr = data["value"]
|
|
||||||
|
|
||||||
# Get heartrate zone by range: min <= heartrate < max
|
|
||||||
d_hrzone = None
|
|
||||||
for hrzone, hrrange in heartrate_zones_range.items():
|
|
||||||
if d_hr >= hrrange[0] and d_hr < hrrange[1]:
|
|
||||||
d_hrzone = hrzone
|
|
||||||
break
|
|
||||||
|
|
||||||
row = (device_id,
|
|
||||||
d_hr,
|
|
||||||
d_hrzone,
|
|
||||||
d_datetime,
|
|
||||||
d_datetime.date(),
|
|
||||||
d_datetime.month,
|
|
||||||
d_datetime.day,
|
|
||||||
d_datetime.weekday(),
|
|
||||||
d_datetime.time(),
|
|
||||||
d_datetime.hour,
|
|
||||||
d_datetime.minute,
|
|
||||||
HOUR2EPOCH[d_datetime.hour])
|
|
||||||
|
|
||||||
records.append(row)
|
|
||||||
|
|
||||||
return pd.DataFrame(data=records, columns=HR_COLUMNS)
|
|
||||||
|
|
||||||
def parse_calories_data(calories_data):
|
|
||||||
if calories_data.empty:
|
|
||||||
return pd.DataFrame(columns=CALORIES_COLUMNS)
|
|
||||||
device_id = calories_data["device_id"].iloc[0]
|
|
||||||
records = []
|
|
||||||
# Parse JSON into individual records
|
|
||||||
for record in calories_data.fitbit_data:
|
|
||||||
record = json.loads(record) # Parse text into JSON
|
|
||||||
curr_date = datetime.strptime(
|
|
||||||
record["activities-calories"][0]["dateTime"], "%Y-%m-%d")
|
|
||||||
dataset = record["activities-calories-intraday"]["dataset"]
|
|
||||||
for data in dataset:
|
|
||||||
d_time = datetime.strptime(data["time"], '%H:%M:%S').time()
|
|
||||||
d_datetime = datetime.combine(curr_date, d_time)
|
|
||||||
|
|
||||||
row = (device_id,
|
|
||||||
data["level"],
|
|
||||||
data["mets"],
|
|
||||||
data["value"],
|
|
||||||
d_datetime,
|
|
||||||
d_datetime.date(),
|
|
||||||
d_datetime.month,
|
|
||||||
d_datetime.day,
|
|
||||||
d_datetime.weekday(),
|
|
||||||
d_datetime.time(),
|
|
||||||
d_datetime.hour,
|
|
||||||
d_datetime.minute,
|
|
||||||
HOUR2EPOCH[d_datetime.hour])
|
|
||||||
|
|
||||||
records.append(row)
|
|
||||||
|
|
||||||
return pd.DataFrame(data=records, columns=CALORIES_COLUMNS)
|
|
||||||
|
|
||||||
|
|
||||||
fitbit_data = pd.read_csv(snakemake.input[0])
|
fitbit_data = pd.read_csv(snakemake.input[0])
|
||||||
|
@ -211,14 +37,16 @@ data = fitbit_data[fitbit_data["fitbit_data_type"] == sensor]
|
||||||
data = drop_duplicates(data, local_timezone)
|
data = drop_duplicates(data, local_timezone)
|
||||||
|
|
||||||
if sensor == "heartrate":
|
if sensor == "heartrate":
|
||||||
data_preprocesed = parse_heartrate_data(data)
|
summary_data, intraday_data = parseHeartrateData(data, HOUR2EPOCH)
|
||||||
elif sensor == "sleep":
|
elif sensor == "sleep":
|
||||||
data_preprocesed = parse_sleep_data(data)
|
summary_data, intraday_data = parseSleepData(data, HOUR2EPOCH)
|
||||||
elif sensor == "steps":
|
elif sensor == "steps":
|
||||||
data_preprocesed = parse_steps_data(data)
|
summary_data, intraday_data = parseStepsData(data, HOUR2EPOCH)
|
||||||
elif sensor == "calories":
|
elif sensor == "calories":
|
||||||
data_preprocesed = parse_calories_data(data)
|
summary_data, intraday_data = parseCaloriesData(data, HOUR2EPOCH)
|
||||||
else:
|
else:
|
||||||
raise ValueError("Please check the FITBIT_SENSORS list in config.yaml file.")
|
raise ValueError("Please check the FITBIT_SENSORS list in config.yaml file.")
|
||||||
|
|
||||||
data_preprocesed.to_csv(snakemake.output[0], index=False)
|
# Summary data will be empty for steps and calories as it is not provided by Fitbit's API
|
||||||
|
summary_data.to_csv(snakemake.output["summary_data"], index=False)
|
||||||
|
intraday_data.to_csv(snakemake.output["intraday_data"], index=False)
|
||||||
|
|
|
@ -4,47 +4,75 @@ from scipy.stats import entropy
|
||||||
import json
|
import json
|
||||||
|
|
||||||
|
|
||||||
heartrate_data = pd.read_csv(snakemake.input[0], parse_dates=["local_date_time", "local_date"])
|
def extractHRFeaturesFromSummaryData(heartrate_summary_data, daily_features_from_summary_data):
|
||||||
day_segment = snakemake.params["day_segment"]
|
heartrate_summary_features = pd.DataFrame()
|
||||||
features = snakemake.params["features"]
|
if "restinghr" in daily_features_from_summary_data:
|
||||||
|
heartrate_summary_features["heartrate_daily_restinghr"] = heartrate_summary_data["heartrate_daily_restinghr"]
|
||||||
|
# calories features might be inaccurate: they depend on users' fitbit profile (weight, height, etc.)
|
||||||
|
if "caloriesoutofrange" in daily_features_from_summary_data:
|
||||||
|
heartrate_summary_features["heartrate_daily_caloriesoutofrange"] = heartrate_summary_data["heartrate_daily_caloriesoutofrange"]
|
||||||
|
if "caloriesfatburn" in daily_features_from_summary_data:
|
||||||
|
heartrate_summary_features["heartrate_daily_caloriesfatburn"] = heartrate_summary_data["heartrate_daily_caloriesfatburn"]
|
||||||
|
if "caloriescardio" in daily_features_from_summary_data:
|
||||||
|
heartrate_summary_features["heartrate_daily_caloriescardio"] = heartrate_summary_data["heartrate_daily_caloriescardio"]
|
||||||
|
if "caloriespeak" in daily_features_from_summary_data:
|
||||||
|
heartrate_summary_features["heartrate_daily_caloriespeak"] = heartrate_summary_data["heartrate_daily_caloriespeak"]
|
||||||
|
heartrate_summary_features.reset_index(inplace=True)
|
||||||
|
|
||||||
|
return heartrate_summary_features
|
||||||
|
|
||||||
heartrate_features = pd.DataFrame(columns=["local_date"] + ["heartrate_" + day_segment + "_" + x for x in features])
|
def extractHRFeaturesFromIntradayData(heartrate_intraday_data, features):
|
||||||
if not heartrate_data.empty:
|
heartrate_intraday_features = pd.DataFrame(columns=["local_date"] + ["heartrate_" + day_segment + "_" + x for x in features])
|
||||||
device_id = heartrate_data["device_id"][0]
|
if not heartrate_intraday_data.empty:
|
||||||
num_rows_per_minute = heartrate_data.groupby(["local_date", "local_hour", "local_minute"]).count().mean()["device_id"]
|
device_id = heartrate_intraday_data["device_id"][0]
|
||||||
|
num_rows_per_minute = heartrate_intraday_data.groupby(["local_date", "local_hour", "local_minute"]).count().mean()["device_id"]
|
||||||
if day_segment != "daily":
|
if day_segment != "daily":
|
||||||
heartrate_data =heartrate_data[heartrate_data["local_day_segment"] == day_segment]
|
heartrate_intraday_data = heartrate_intraday_data[heartrate_intraday_data["local_day_segment"] == day_segment]
|
||||||
|
|
||||||
if not heartrate_data.empty:
|
if not heartrate_intraday_data.empty:
|
||||||
heartrate_features = pd.DataFrame()
|
heartrate_intraday_features = pd.DataFrame()
|
||||||
|
|
||||||
# get stats of heartrate
|
# get stats of heartrate
|
||||||
if "maxhr" in features:
|
if "maxhr" in features:
|
||||||
heartrate_features["heartrate_" + day_segment + "_maxhr"] = heartrate_data.groupby(["local_date"])["heartrate"].max()
|
heartrate_intraday_features["heartrate_" + day_segment + "_maxhr"] = heartrate_intraday_data[["local_date", "heartrate"]].groupby(["local_date"])["heartrate"].max()
|
||||||
if "minhr" in features:
|
if "minhr" in features:
|
||||||
heartrate_features["heartrate_" + day_segment + "_minhr"] = heartrate_data.groupby(["local_date"])["heartrate"].min()
|
heartrate_intraday_features["heartrate_" + day_segment + "_minhr"] = heartrate_intraday_data[["local_date", "heartrate"]].groupby(["local_date"])["heartrate"].min()
|
||||||
if "avghr" in features:
|
if "avghr" in features:
|
||||||
heartrate_features["heartrate_" + day_segment + "_avghr"] = heartrate_data.groupby(["local_date"])["heartrate"].mean()
|
heartrate_intraday_features["heartrate_" + day_segment + "_avghr"] = heartrate_intraday_data[["local_date", "heartrate"]].groupby(["local_date"])["heartrate"].mean()
|
||||||
if "medianhr" in features:
|
if "medianhr" in features:
|
||||||
heartrate_features["heartrate_" + day_segment + "_medianhr"] = heartrate_data.groupby(["local_date"])["heartrate"].median()
|
heartrate_intraday_features["heartrate_" + day_segment + "_medianhr"] = heartrate_intraday_data[["local_date", "heartrate"]].groupby(["local_date"])["heartrate"].median()
|
||||||
if "modehr" in features:
|
if "modehr" in features:
|
||||||
heartrate_features["heartrate_" + day_segment + "_modehr"] = heartrate_data.groupby(["local_date"])["heartrate"].agg(lambda x: pd.Series.mode(x)[0])
|
heartrate_intraday_features["heartrate_" + day_segment + "_modehr"] = heartrate_intraday_data[["local_date", "heartrate"]].groupby(["local_date"])["heartrate"].agg(lambda x: pd.Series.mode(x)[0])
|
||||||
if "stdhr" in features:
|
if "stdhr" in features:
|
||||||
heartrate_features["heartrate_" + day_segment + "_stdhr"] = heartrate_data.groupby(["local_date"])["heartrate"].std()
|
heartrate_intraday_features["heartrate_" + day_segment + "_stdhr"] = heartrate_intraday_data[["local_date", "heartrate"]].groupby(["local_date"])["heartrate"].std()
|
||||||
if "diffmaxmodehr" in features:
|
if "diffmaxmodehr" in features:
|
||||||
heartrate_features["heartrate_" + day_segment + "_diffmaxmodehr"] = heartrate_data.groupby(["local_date"])["heartrate"].max() - heartrate_data.groupby(["local_date"])["heartrate"].agg(lambda x: pd.Series.mode(x)[0])
|
heartrate_intraday_features["heartrate_" + day_segment + "_diffmaxmodehr"] = heartrate_intraday_data[["local_date", "heartrate"]].groupby(["local_date"])["heartrate"].max() - heartrate_intraday_data[["local_date", "heartrate"]].groupby(["local_date"])["heartrate"].agg(lambda x: pd.Series.mode(x)[0])
|
||||||
if "diffminmodehr" in features:
|
if "diffminmodehr" in features:
|
||||||
heartrate_features["heartrate_" + day_segment + "_diffminmodehr"] = heartrate_data.groupby(["local_date"])["heartrate"].agg(lambda x: pd.Series.mode(x)[0]) - heartrate_data.groupby(["local_date"])["heartrate"].min()
|
heartrate_intraday_features["heartrate_" + day_segment + "_diffminmodehr"] = heartrate_intraday_data[["local_date", "heartrate"]].groupby(["local_date"])["heartrate"].agg(lambda x: pd.Series.mode(x)[0]) - heartrate_intraday_data[["local_date", "heartrate"]].groupby(["local_date"])["heartrate"].min()
|
||||||
if "entropyhr" in features:
|
if "entropyhr" in features:
|
||||||
heartrate_features["heartrate_" + day_segment + "_entropyhr"] = heartrate_data.groupby(["local_date"])["heartrate"].agg(entropy)
|
heartrate_intraday_features["heartrate_" + day_segment + "_entropyhr"] = heartrate_intraday_data[["local_date", "heartrate"]].groupby(["local_date"])["heartrate"].agg(entropy)
|
||||||
|
|
||||||
# get number of minutes in each heart rate zone
|
# get number of minutes in each heart rate zone
|
||||||
for feature_name in list(set(["lengthoutofrange", "lengthfatburn", "lengthcardio", "lengthpeak"]) & set(features)):
|
for feature_name in list(set(["lengthoutofrange", "lengthfatburn", "lengthcardio", "lengthpeak"]) & set(features)):
|
||||||
heartrate_zone = heartrate_data[heartrate_data["heartrate_zone"] == feature_name[6:]]
|
heartrate_zone = heartrate_intraday_data[heartrate_intraday_data["heartrate_zone"] == feature_name[6:]]
|
||||||
heartrate_features["heartrate_" + day_segment + "_" + feature_name] = heartrate_zone.groupby(["local_date"])["device_id"].count() / num_rows_per_minute
|
heartrate_intraday_features["heartrate_" + day_segment + "_" + feature_name] = heartrate_zone.groupby(["local_date"])["device_id"].count() / num_rows_per_minute
|
||||||
heartrate_features.fillna(value={"heartrate_" + day_segment + "_" + feature_name: 0}, inplace=True)
|
heartrate_intraday_features.fillna(value={"heartrate_" + day_segment + "_" + feature_name: 0}, inplace=True)
|
||||||
|
heartrate_intraday_features.reset_index(inplace=True)
|
||||||
|
|
||||||
heartrate_features = heartrate_features.reset_index()
|
return heartrate_intraday_features
|
||||||
|
|
||||||
|
|
||||||
|
heartrate_summary_data = pd.read_csv(snakemake.input["heartrate_summary_data"], index_col=["local_date"], parse_dates=["local_date"])
|
||||||
|
heartrate_intraday_data = pd.read_csv(snakemake.input["heartrate_intraday_data"], parse_dates=["local_date_time", "local_date"])
|
||||||
|
day_segment = snakemake.params["day_segment"]
|
||||||
|
features = snakemake.params["features"]
|
||||||
|
daily_features_from_summary_data = snakemake.params["daily_features_from_summary_data"]
|
||||||
|
|
||||||
|
heartrate_intraday_features = extractHRFeaturesFromIntradayData(heartrate_intraday_data, features)
|
||||||
|
if not heartrate_summary_data.empty and day_segment == "daily" and daily_features_from_summary_data != []:
|
||||||
|
heartrate_summary_features = extractHRFeaturesFromSummaryData(heartrate_summary_data, daily_features_from_summary_data)
|
||||||
|
heartrate_features = heartrate_intraday_features.merge(heartrate_summary_features, on=["local_date"], how="outer")
|
||||||
|
else:
|
||||||
|
heartrate_features = heartrate_intraday_features
|
||||||
|
|
||||||
heartrate_features.to_csv(snakemake.output[0], index=False)
|
heartrate_features.to_csv(snakemake.output[0], index=False)
|
||||||
|
|
|
@ -0,0 +1,67 @@
|
||||||
|
import pandas as pd
|
||||||
|
import itertools
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def dailyFeaturesFromSummaryData(sleep_summary_data, sleep_type):
|
||||||
|
if sleep_type == "main":
|
||||||
|
sleep_summary_data = sleep_summary_data[sleep_summary_data["is_main_sleep"] == 1]
|
||||||
|
elif sleep_type == "nap":
|
||||||
|
sleep_summary_data = sleep_summary_data[sleep_summary_data["is_main_sleep"] == 0]
|
||||||
|
elif sleep_type == "all":
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
raise ValueError("sleep_type can only be one of ['main', 'nap', 'all'].")
|
||||||
|
|
||||||
|
features_sum = sleep_summary_data[["minutes_after_wakeup", "minutes_asleep", "minutes_awake", "minutes_to_fall_asleep", "minutes_in_bed", "local_end_date"]].groupby(["local_end_date"]).sum()
|
||||||
|
features_sum.index.rename("local_date", inplace=True)
|
||||||
|
if "sumdurationafterwakeup" in daily_features_from_summary_data:
|
||||||
|
sleep_daily_features["sleep_daily_sumdurationafterwakeup" + sleep_type] = features_sum["minutes_after_wakeup"]
|
||||||
|
if "sumdurationasleep" in daily_features_from_summary_data:
|
||||||
|
sleep_daily_features["sleep_daily_sumdurationasleep" + sleep_type] = features_sum["minutes_asleep"]
|
||||||
|
if "sumdurationawake" in daily_features_from_summary_data:
|
||||||
|
sleep_daily_features["sleep_daily_sumdurationawake" + sleep_type] = features_sum["minutes_awake"]
|
||||||
|
if "sumdurationtofallasleep" in daily_features_from_summary_data:
|
||||||
|
sleep_daily_features["sleep_daily_sumdurationtofallasleep" + sleep_type] = features_sum["minutes_to_fall_asleep"]
|
||||||
|
if "sumdurationinbed" in daily_features_from_summary_data:
|
||||||
|
sleep_daily_features["sleep_daily_sumdurationinbed" + sleep_type] = features_sum["minutes_in_bed"]
|
||||||
|
|
||||||
|
features_avg = sleep_summary_data[["efficiency", "local_end_date"]].groupby(["local_end_date"]).mean()
|
||||||
|
features_avg.index.rename("local_date", inplace=True)
|
||||||
|
if "avgefficiency" in daily_features_from_summary_data:
|
||||||
|
sleep_daily_features["sleep_daily_avgefficiency" + sleep_type] = features_avg["efficiency"]
|
||||||
|
|
||||||
|
features_count = sleep_summary_data[["local_start_date_time", "local_end_date"]].groupby(["local_end_date"]).count()
|
||||||
|
features_count.index.rename("local_date", inplace=True)
|
||||||
|
if "countepisode" in daily_features_from_summary_data:
|
||||||
|
sleep_daily_features["sleep_daily_count" + sleep_type] = features_count["local_start_date_time"]
|
||||||
|
|
||||||
|
return sleep_daily_features
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
sleep_summary_data = pd.read_csv(snakemake.input["sleep_summary_data"])
|
||||||
|
sleep_types = snakemake.params["sleep_types"]
|
||||||
|
daily_features_from_summary_data = snakemake.params["daily_features_from_summary_data"]
|
||||||
|
day_segment = snakemake.params["day_segment"]
|
||||||
|
|
||||||
|
daily_features_can_be_zero = list(set(daily_features_from_summary_data) - set(["avgefficiency"]))
|
||||||
|
colnames_can_be_zero = ["sleep_daily_" + x for x in ["".join(feature) for feature in itertools.product(daily_features_can_be_zero, sleep_types)]]
|
||||||
|
|
||||||
|
colnames = ["sleep_daily_" + x for x in ["".join(feature) for feature in itertools.product(daily_features_from_summary_data, sleep_types)]]
|
||||||
|
|
||||||
|
if sleep_summary_data.empty:
|
||||||
|
sleep_daily_features = pd.DataFrame(columns=["local_date"] + colnames)
|
||||||
|
else:
|
||||||
|
sleep_daily_features = pd.DataFrame(columns=colnames)
|
||||||
|
for sleep_type in sleep_types:
|
||||||
|
sleep_daily_features = dailyFeaturesFromSummaryData(sleep_summary_data, sleep_type)
|
||||||
|
|
||||||
|
sleep_daily_features[colnames_can_be_zero] = sleep_daily_features[colnames_can_be_zero].fillna(0)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
if day_segment == "daily":
|
||||||
|
sleep_daily_features.to_csv(snakemake.output[0])
|
||||||
|
else:
|
||||||
|
ValueError("Sleep summary features are only implemented for daily day segments")
|
Loading…
Reference in New Issue