Add hr intraday to fitbitjson_mysql

pull/128/head
JulioV 2021-03-10 13:31:56 -05:00
parent 47f449555a
commit 47e1b33816
8 changed files with 158 additions and 8 deletions

View File

@ -235,8 +235,7 @@ for provider in config["FITBIT_HEARTRATE_SUMMARY"]["PROVIDERS"].keys():
for provider in config["FITBIT_HEARTRATE_INTRADAY"]["PROVIDERS"].keys():
if config["FITBIT_HEARTRATE_INTRADAY"]["PROVIDERS"][provider]["COMPUTE"]:
files_to_compute.extend(expand("data/raw/{pid}/fitbit_heartrate_intraday_raw.csv", pid=config["PIDS"]))
files_to_compute.extend(expand("data/raw/{pid}/fitbit_heartrate_intraday_parsed.csv", pid=config["PIDS"]))
files_to_compute.extend(expand("data/raw/{pid}/fitbit_heartrate_intraday_parsed_with_datetime.csv", pid=config["PIDS"]))
files_to_compute.extend(expand("data/raw/{pid}/fitbit_heartrate_intraday_with_datetime.csv", pid=config["PIDS"]))
files_to_compute.extend(expand("data/interim/{pid}/fitbit_heartrate_intraday_features/fitbit_heartrate_intraday_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["FITBIT_HEARTRATE_INTRADAY"]["PROVIDERS"][provider]["SRC_LANGUAGE"].lower(), provider_key=provider.lower()))
files_to_compute.extend(expand("data/processed/features/{pid}/fitbit_heartrate_intraday.csv", pid=config["PIDS"]))
files_to_compute.extend(expand("data/processed/features/{pid}/all_sensor_features.csv", pid=config["PIDS"]))

View File

@ -60,6 +60,45 @@ If you want RAPIDS to process Fitbit sensor data using this stream, you will nee
|a748ee1a-1d0b-4ae9-9074-279a2b6ba524 |{"activities-heart":[{"dateTime":"2020-10-08","value":{"customHeartRateZones":[],"heartRateZones":[{"caloriesOut":1100.1120,"max":89,"min":30,"minutes":921,"name":"Out of Range"},{"caloriesOut":660.0012,"max":118,"min":82,"minutes":361,"name":"Fat Burn"},{"caloriesOut":23.7088,"max":142,"min":108,"minutes":3,"name":"Cardio"},{"caloriesOut":0,"max":221,"min":148,"minutes":0,"name":"Peak"}],"restingHeartRate":70}}],"activities-heart-intraday":{"dataset":[{"time":"00:00:00","value":77},{"time":"00:01:00","value":75},{"time":"00:02:00","value":73},...],"datasetInterval":1,"datasetType":"minute"}}
|a748ee1a-1d0b-4ae9-9074-279a2b6ba524 |{"activities-heart":[{"dateTime":"2020-10-09","value":{"customHeartRateZones":[],"heartRateZones":[{"caloriesOut":750.3615,"max":77,"min":30,"minutes":851,"name":"Out of Range"},{"caloriesOut":734.1516,"max":107,"min":77,"minutes":550,"name":"Fat Burn"},{"caloriesOut":131.8579,"max":130,"min":107,"minutes":29,"name":"Cardio"},{"caloriesOut":0,"max":220,"min":130,"minutes":0,"name":"Peak"}],"restingHeartRate":69}}],"activities-heart-intraday":{"dataset":[{"time":"00:00:00","value":90},{"time":"00:01:00","value":89},{"time":"00:02:00","value":88},...],"datasetInterval":1,"datasetType":"minute"}}
??? info "FITBIT_HEARTRATE_INTRADAY"
**RAPIDS_COLUMN_MAPPINGS**
| RAPIDS column | Stream column |
|-----------------|-----------------|
| LOCAL_DATE_TIME | FLAG_TO_MUTATE |
| DEVICE_ID | device_id |
| HEARTRATE | FLAG_TO_MUTATE |
| HEARTRATE_ZONE | FLAG_TO_MUTATE |
**MUTATION**
- **COLUMN_MAPPINGS**
| Script column | Stream column |
|-----------------|-----------------|
| JSON_FITBIT_COLUMN | fitbit_data |
- **SCRIPTS**
```bash
src/data/streams/mutations/fitbit/parse_heartrate_intraday_json.py
```
!!! note
All columns except `DEVICE_ID` are parsed from `JSON_FITBIT_COLUMN`. `JSON_FITBIT_COLUMN` is a string column containing the JSON objects returned by Fitbit's API. See an example of the raw data RAPIDS expects for this data stream:
??? "Example of the raw data RAPIDS expects for this data stream"
|device_id |fitbit_data |
|---------------------------------------- |--------------------------------------------------------- |
|a748ee1a-1d0b-4ae9-9074-279a2b6ba524 |{"activities-heart":[{"dateTime":"2020-10-07","value":{"customHeartRateZones":[],"heartRateZones":[{"caloriesOut":1200.6102,"max":88,"min":31,"minutes":1058,"name":"Out of Range"},{"caloriesOut":760.3020,"max":120,"min":86,"minutes":366,"name":"Fat Burn"},{"caloriesOut":15.2048,"max":146,"min":120,"minutes":2,"name":"Cardio"},{"caloriesOut":0,"max":221,"min":148,"minutes":0,"name":"Peak"}],"restingHeartRate":72}}],"activities-heart-intraday":{"dataset":[{"time":"00:00:00","value":68},{"time":"00:01:00","value":67},{"time":"00:02:00","value":67},...],"datasetInterval":1,"datasetType":"minute"}}
|a748ee1a-1d0b-4ae9-9074-279a2b6ba524 |{"activities-heart":[{"dateTime":"2020-10-08","value":{"customHeartRateZones":[],"heartRateZones":[{"caloriesOut":1100.1120,"max":89,"min":30,"minutes":921,"name":"Out of Range"},{"caloriesOut":660.0012,"max":118,"min":82,"minutes":361,"name":"Fat Burn"},{"caloriesOut":23.7088,"max":142,"min":108,"minutes":3,"name":"Cardio"},{"caloriesOut":0,"max":221,"min":148,"minutes":0,"name":"Peak"}],"restingHeartRate":70}}],"activities-heart-intraday":{"dataset":[{"time":"00:00:00","value":77},{"time":"00:01:00","value":75},{"time":"00:02:00","value":73},...],"datasetInterval":1,"datasetType":"minute"}}
|a748ee1a-1d0b-4ae9-9074-279a2b6ba524 |{"activities-heart":[{"dateTime":"2020-10-09","value":{"customHeartRateZones":[],"heartRateZones":[{"caloriesOut":750.3615,"max":77,"min":30,"minutes":851,"name":"Out of Range"},{"caloriesOut":734.1516,"max":107,"min":77,"minutes":550,"name":"Fat Burn"},{"caloriesOut":131.8579,"max":130,"min":107,"minutes":29,"name":"Cardio"},{"caloriesOut":0,"max":220,"min":130,"minutes":0,"name":"Peak"}],"restingHeartRate":69}}],"activities-heart-intraday":{"dataset":[{"time":"00:00:00","value":90},{"time":"00:01:00","value":89},{"time":"00:02:00","value":88},...],"datasetInterval":1,"datasetType":"minute"}}
??? info "FITBIT_STEPS_SUMMARY"
**RAPIDS_COLUMN_MAPPINGS**

View File

@ -15,6 +15,16 @@ This is a description of the format RAPIDS needs to process data for the followi
| HEARTRATE_DAILY_CALORIESCARDIO | Calories spent while heartrate was inside the cardio [zone](https://help.fitbit.com/articles/en_US/Help_article/1565.htm#) |
| HEARTRATE_DAILY_CALORIESPEAK | Calories spent while heartrate was inside the peak [zone](https://help.fitbit.com/articles/en_US/Help_article/1565.htm#) |
??? info "FITBIT_HEARTRATE_INTRADAY"
| RAPIDS column | Description |
|-----------------|-----------------|
| TIMESTAMP | An UNIX timestamp (13 digits) when a row of data was logged |
| LOCAL_DATE_TIME | Date time string with format `yyyy-mm-dd hh:mm:ss` |
| DEVICE_ID | A string that uniquely identifies a device |
| HEARTRATE | Intraday heartrate |
| HEARTRATE_ZONE | Heartrate [zone](https://help.fitbit.com/articles/en_US/Help_article/1565.htm#) that HEARTRATE belongs to. It is based on the heartrate zone ranges of each device |
??? info "FITBIT_STEPS_SUMMARY"
| RAPIDS column | Description |

View File

@ -558,7 +558,7 @@ rule fitbit_heartrate_summary_r_features:
rule fitbit_heartrate_intraday_python_features:
input:
sensor_data = "data/raw/{pid}/fitbit_heartrate_intraday_parsed_with_datetime.csv",
sensor_data = "data/raw/{pid}/fitbit_heartrate_intraday_with_datetime.csv",
time_segments_labels = "data/interim/time_segments/{pid}_time_segments_labels.csv"
params:
provider = lambda wildcards: config["FITBIT_HEARTRATE_INTRADAY"]["PROVIDERS"][wildcards.provider_key.upper()],
@ -571,7 +571,7 @@ rule fitbit_heartrate_intraday_python_features:
rule fitbit_heartrate_intraday_r_features:
input:
sensor_data = "data/raw/{pid}/fitbit_heartrate_intraday_parsed_with_datetime.csv",
sensor_data = "data/raw/{pid}/fitbit_heartrate_intraday_with_datetime.csv",
time_segments_labels = "data/interim/time_segments/{pid}_time_segments_labels.csv"
params:
provider = lambda wildcards: config["FITBIT_HEARTRATE_INTRADAY"]["PROVIDERS"][wildcards.provider_key.upper()],

View File

@ -14,6 +14,20 @@ FITBIT_HEARTRATE_SUMMARY:
SCRIPTS: # List any python or r scripts that mutate your raw data
- src/data/streams/mutations/fitbit/parse_heartrate_summary_json.py
FITBIT_HEARTRATE_INTRADAY:
RAPIDS_COLUMN_MAPPINGS:
TIMESTAMP: FLAG_TO_MUTATE
DEVICE_ID: device_id
LOCAL_DATE_TIME: FLAG_TO_MUTATE
HEARTRATE: FLAG_TO_MUTATE
HEARTRATE_ZONE: FLAG_TO_MUTATE
MUTATION:
COLUMN_MAPPINGS:
JSON_FITBIT_COLUMN: fitbit_data # text column with JSON objects
SCRIPTS: # List any python or r scripts that mutate your raw data
- src/data/streams/mutations/fitbit/parse_heartrate_intraday_json.py
FITBIT_STEPS_SUMMARY:
RAPIDS_COLUMN_MAPPINGS:
TIMESTAMP: FLAG_TO_MUTATE

View File

@ -0,0 +1,83 @@
import json
import pandas as pd
from datetime import datetime
HR_INTRADAY_COLUMNS = ("device_id",
"heartrate",
"heartrate_zone",
"local_date_time",
"timestamp")
def parseHeartrateZones(heartrate_data):
# Get the range of heartrate zones: outofrange, fatburn, cardio, peak
# refer to: https://help.fitbit.com/articles/en_US/Help_article/1565
heartrate_fitbit_data = heartrate_data["activities-heart"][0]
# API Version X: not sure the exact version
if "heartRateZones" in heartrate_fitbit_data:
heartrate_zones = heartrate_fitbit_data["heartRateZones"]
# API VERSION Y: not sure the exact version
elif "value" in heartrate_fitbit_data:
heartrate_zones = heartrate_fitbit_data["value"]["heartRateZones"]
else:
raise ValueError("Heartrate zone are stored in an unknown format, this could mean Fitbit's heartrate API changed")
heartrate_zones_range = {}
for hrzone in heartrate_zones:
heartrate_zones_range[hrzone["name"].lower().replace(" ", "")] = [hrzone["min"], hrzone["max"]]
return heartrate_zones_range
def parseHeartrateIntradayData(records_intraday, dataset, device_id, curr_date, heartrate_zones_range):
for data in dataset:
d_time = datetime.strptime(data["time"], '%H:%M:%S').time()
d_datetime = datetime.combine(curr_date, d_time)
d_hr = data["value"]
# Get heartrate zone by range: min <= heartrate < max
d_hrzone = None
for hrzone, hrrange in heartrate_zones_range.items():
if d_hr >= hrrange[0] and d_hr < hrrange[1]:
d_hrzone = hrzone
break
row_intraday = (device_id,
d_hr, d_hrzone,
d_datetime,
0)
records_intraday.append(row_intraday)
return records_intraday
def parseHeartrateData(heartrate_data):
if heartrate_data.empty:
return pd.DataFrame(columns=HR_INTRADAY_COLUMNS)
device_id = heartrate_data["device_id"].iloc[0]
records_intraday = []
# Parse JSON into individual records
for record in heartrate_data.json_fitbit_column:
record = json.loads(record) # Parse text into JSON
if "activities-heart" in record:
heartrate_zones_range = parseHeartrateZones(record)
curr_date = datetime.strptime(record["activities-heart"][0]["dateTime"], "%Y-%m-%d")
if "activities-heart-intraday" in record:
dataset = record["activities-heart-intraday"]["dataset"]
records_intraday = parseHeartrateIntradayData(records_intraday, dataset, device_id, curr_date, heartrate_zones_range)
parsed_data = pd.DataFrame(data=records_intraday, columns=HR_INTRADAY_COLUMNS)
return parsed_data
def main(json_raw, stream_parameters):
parsed_data = parseHeartrateData(json_raw)
parsed_data["timestamp"] = 0 # this column is added at readable_datetime.R because we neeed to take into account multiple timezones
parsed_data['local_date_time'] = parsed_data['local_date_time'].dt.strftime('%Y-%m-%d %H:%M:%S')
return(parsed_data)

View File

@ -1,8 +1,6 @@
import yaml, json, sys
import json
import pandas as pd
import numpy as np
from datetime import datetime, timezone
from math import trunc
from datetime import datetime
HR_SUMMARY_COLUMNS = ("device_id",

View File

@ -102,6 +102,13 @@ FITBIT_HEARTRATE_SUMMARY:
- HEARTRATE_DAILY_CALORIESCARDIO
- HEARTRATE_DAILY_CALORIESPEAK
FITBIT_HEARTRATE_INTRADAY:
- TIMESTAMP
- DEVICE_ID
- LOCAL_DATE_TIME
- HEARTRATE
- HEARTRATE_ZONE
FITBIT_STEPS_SUMMARY:
- TIMESTAMP
- DEVICE_ID