rapids/src/data/fitbit_parse_heartrate.py

133 lines
5.5 KiB
Python
Raw Normal View History

2020-10-22 19:08:52 +02:00
import json, sys
import pandas as pd
2020-10-22 19:08:52 +02:00
from datetime import datetime, timezone
from math import trunc
HR_SUMMARY_COLUMNS = ("device_id",
2020-10-22 19:08:52 +02:00
"local_date_time",
"timestamp",
"heartrate_daily_restinghr",
"heartrate_daily_caloriesoutofrange",
"heartrate_daily_caloriesfatburn",
"heartrate_daily_caloriescardio",
"heartrate_daily_caloriespeak")
HR_INTRADAY_COLUMNS = ("device_id",
2020-10-22 19:08:52 +02:00
"heartrate",
"heartrate_zone",
"local_date_time",
"timestamp")
def parseHeartrateZones(heartrate_data):
# Get the range of heartrate zones: outofrange, fatburn, cardio, peak
# refer to: https://help.fitbit.com/articles/en_US/Help_article/1565
heartrate_fitbit_data = json.loads(heartrate_data["fitbit_data"].iloc[0])["activities-heart"][0]
# API Version X: not sure the exact version
if "heartRateZones" in heartrate_fitbit_data:
heartrate_zones = heartrate_fitbit_data["heartRateZones"]
# API VERSION Y: not sure the exact version
elif "value" in heartrate_fitbit_data:
heartrate_zones = heartrate_fitbit_data["value"]["heartRateZones"]
else:
raise ValueError("Heartrate zone are stored in an unkown format, this could mean Fitbit's heartrate API changed")
heartrate_zones_range = {}
for hrzone in heartrate_zones:
heartrate_zones_range[hrzone["name"].lower().replace(" ", "")] = [hrzone["min"], hrzone["max"]]
return heartrate_zones_range
def parseHeartrateSummaryData(record_summary, device_id, curr_date):
# API Version X: not sure the exact version
if "heartRateZones" in record_summary:
heartrate_zones = record_summary["heartRateZones"]
d_resting_heartrate = record_summary["value"] if "value" in record_summary else None
# API VERSION Y: not sure the exact version
elif "value" in record_summary:
heartrate_zones = record_summary["value"]["heartRateZones"]
d_resting_heartrate = record_summary["value"]["restingHeartRate"] if "restingHeartRate" in record_summary["value"] else None
else:
ValueError("Heartrate zone are stored in an unkown format, this could mean Fitbit's heartrate API changed")
if "caloriesOut" in heartrate_zones[0]:
d_calories_outofrange = heartrate_zones[0]["caloriesOut"]
d_calories_fatburn = heartrate_zones[1]["caloriesOut"]
d_calories_cardio = heartrate_zones[2]["caloriesOut"]
d_calories_peak = heartrate_zones[3]["caloriesOut"]
else:
d_calories_outofrange, d_calories_fatburn, d_calories_cardio, d_calories_peak = None, None, None, None
row_summary = (device_id,
curr_date,
2020-10-22 19:08:52 +02:00
0,
d_resting_heartrate,
d_calories_outofrange,
d_calories_fatburn,
d_calories_cardio,
d_calories_peak)
return row_summary
2020-10-22 19:08:52 +02:00
def parseHeartrateIntradayData(records_intraday, dataset, device_id, curr_date, heartrate_zones_range):
for data in dataset:
d_time = datetime.strptime(data["time"], '%H:%M:%S').time()
d_datetime = datetime.combine(curr_date, d_time)
d_hr = data["value"]
# Get heartrate zone by range: min <= heartrate < max
d_hrzone = None
for hrzone, hrrange in heartrate_zones_range.items():
if d_hr >= hrrange[0] and d_hr < hrrange[1]:
d_hrzone = hrzone
break
row_intraday = (device_id,
d_hr, d_hrzone,
2020-10-22 19:08:52 +02:00
d_datetime,
0)
records_intraday.append(row_intraday)
return records_intraday
2020-10-22 19:08:52 +02:00
# def append_timestamp(data):
2020-10-22 19:08:52 +02:00
def parseHeartrateData(heartrate_data):
if heartrate_data.empty:
2020-06-05 00:07:15 +02:00
return pd.DataFrame(columns=HR_SUMMARY_COLUMNS), pd.DataFrame(columns=HR_INTRADAY_COLUMNS)
device_id = heartrate_data["device_id"].iloc[0]
records_summary, records_intraday = [], []
heartrate_zones_range = parseHeartrateZones(heartrate_data)
# Parse JSON into individual records
for record in heartrate_data.fitbit_data:
record = json.loads(record) # Parse text into JSON
curr_date = datetime.strptime(record["activities-heart"][0]["dateTime"], "%Y-%m-%d")
record_summary = record["activities-heart"][0]
row_summary = parseHeartrateSummaryData(record_summary, device_id, curr_date)
records_summary.append(row_summary)
dataset = record["activities-heart-intraday"]["dataset"]
2020-10-22 19:08:52 +02:00
records_intraday = parseHeartrateIntradayData(records_intraday, dataset, device_id, curr_date, heartrate_zones_range)
return pd.DataFrame(data=records_summary, columns=HR_SUMMARY_COLUMNS), pd.DataFrame(data=records_intraday, columns=HR_INTRADAY_COLUMNS)
2020-10-22 19:08:52 +02:00
table_format = snakemake.params["table_format"]
if table_format == "JSON":
json_raw = pd.read_csv(snakemake.input[0])
summary, intraday = parseHeartrateData(json_raw)
elif table_format == "CSV":
summary = pd.read_csv(snakemake.input[0])
intraday = pd.read_csv(snakemake.input[1])
summary["timestamp"] = (summary["local_date_time"] - pd.Timestamp("1970-01-01")) // pd.Timedelta('1s') * 1000
intraday["timestamp"] = (intraday["local_date_time"] - pd.Timestamp("1970-01-01")) // pd.Timedelta('1s') * 1000
summary.to_csv(snakemake.output["summary_data"], index=False)
intraday.to_csv(snakemake.output["intraday_data"], index=False)