Add hr intraday to fitbitjson_mysql

pull/128/head
JulioV 2021-03-10 13:31:56 -05:00
parent 47f449555a
commit 47e1b33816
8 changed files with 158 additions and 8 deletions

View File

@ -235,8 +235,7 @@ for provider in config["FITBIT_HEARTRATE_SUMMARY"]["PROVIDERS"].keys():
for provider in config["FITBIT_HEARTRATE_INTRADAY"]["PROVIDERS"].keys(): for provider in config["FITBIT_HEARTRATE_INTRADAY"]["PROVIDERS"].keys():
if config["FITBIT_HEARTRATE_INTRADAY"]["PROVIDERS"][provider]["COMPUTE"]: if config["FITBIT_HEARTRATE_INTRADAY"]["PROVIDERS"][provider]["COMPUTE"]:
files_to_compute.extend(expand("data/raw/{pid}/fitbit_heartrate_intraday_raw.csv", pid=config["PIDS"])) files_to_compute.extend(expand("data/raw/{pid}/fitbit_heartrate_intraday_raw.csv", pid=config["PIDS"]))
files_to_compute.extend(expand("data/raw/{pid}/fitbit_heartrate_intraday_parsed.csv", pid=config["PIDS"])) files_to_compute.extend(expand("data/raw/{pid}/fitbit_heartrate_intraday_with_datetime.csv", pid=config["PIDS"]))
files_to_compute.extend(expand("data/raw/{pid}/fitbit_heartrate_intraday_parsed_with_datetime.csv", pid=config["PIDS"]))
files_to_compute.extend(expand("data/interim/{pid}/fitbit_heartrate_intraday_features/fitbit_heartrate_intraday_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["FITBIT_HEARTRATE_INTRADAY"]["PROVIDERS"][provider]["SRC_LANGUAGE"].lower(), provider_key=provider.lower())) files_to_compute.extend(expand("data/interim/{pid}/fitbit_heartrate_intraday_features/fitbit_heartrate_intraday_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["FITBIT_HEARTRATE_INTRADAY"]["PROVIDERS"][provider]["SRC_LANGUAGE"].lower(), provider_key=provider.lower()))
files_to_compute.extend(expand("data/processed/features/{pid}/fitbit_heartrate_intraday.csv", pid=config["PIDS"])) files_to_compute.extend(expand("data/processed/features/{pid}/fitbit_heartrate_intraday.csv", pid=config["PIDS"]))
files_to_compute.extend(expand("data/processed/features/{pid}/all_sensor_features.csv", pid=config["PIDS"])) files_to_compute.extend(expand("data/processed/features/{pid}/all_sensor_features.csv", pid=config["PIDS"]))

View File

@ -60,6 +60,45 @@ If you want RAPIDS to process Fitbit sensor data using this stream, you will nee
|a748ee1a-1d0b-4ae9-9074-279a2b6ba524 |{"activities-heart":[{"dateTime":"2020-10-08","value":{"customHeartRateZones":[],"heartRateZones":[{"caloriesOut":1100.1120,"max":89,"min":30,"minutes":921,"name":"Out of Range"},{"caloriesOut":660.0012,"max":118,"min":82,"minutes":361,"name":"Fat Burn"},{"caloriesOut":23.7088,"max":142,"min":108,"minutes":3,"name":"Cardio"},{"caloriesOut":0,"max":221,"min":148,"minutes":0,"name":"Peak"}],"restingHeartRate":70}}],"activities-heart-intraday":{"dataset":[{"time":"00:00:00","value":77},{"time":"00:01:00","value":75},{"time":"00:02:00","value":73},...],"datasetInterval":1,"datasetType":"minute"}} |a748ee1a-1d0b-4ae9-9074-279a2b6ba524 |{"activities-heart":[{"dateTime":"2020-10-08","value":{"customHeartRateZones":[],"heartRateZones":[{"caloriesOut":1100.1120,"max":89,"min":30,"minutes":921,"name":"Out of Range"},{"caloriesOut":660.0012,"max":118,"min":82,"minutes":361,"name":"Fat Burn"},{"caloriesOut":23.7088,"max":142,"min":108,"minutes":3,"name":"Cardio"},{"caloriesOut":0,"max":221,"min":148,"minutes":0,"name":"Peak"}],"restingHeartRate":70}}],"activities-heart-intraday":{"dataset":[{"time":"00:00:00","value":77},{"time":"00:01:00","value":75},{"time":"00:02:00","value":73},...],"datasetInterval":1,"datasetType":"minute"}}
|a748ee1a-1d0b-4ae9-9074-279a2b6ba524 |{"activities-heart":[{"dateTime":"2020-10-09","value":{"customHeartRateZones":[],"heartRateZones":[{"caloriesOut":750.3615,"max":77,"min":30,"minutes":851,"name":"Out of Range"},{"caloriesOut":734.1516,"max":107,"min":77,"minutes":550,"name":"Fat Burn"},{"caloriesOut":131.8579,"max":130,"min":107,"minutes":29,"name":"Cardio"},{"caloriesOut":0,"max":220,"min":130,"minutes":0,"name":"Peak"}],"restingHeartRate":69}}],"activities-heart-intraday":{"dataset":[{"time":"00:00:00","value":90},{"time":"00:01:00","value":89},{"time":"00:02:00","value":88},...],"datasetInterval":1,"datasetType":"minute"}} |a748ee1a-1d0b-4ae9-9074-279a2b6ba524 |{"activities-heart":[{"dateTime":"2020-10-09","value":{"customHeartRateZones":[],"heartRateZones":[{"caloriesOut":750.3615,"max":77,"min":30,"minutes":851,"name":"Out of Range"},{"caloriesOut":734.1516,"max":107,"min":77,"minutes":550,"name":"Fat Burn"},{"caloriesOut":131.8579,"max":130,"min":107,"minutes":29,"name":"Cardio"},{"caloriesOut":0,"max":220,"min":130,"minutes":0,"name":"Peak"}],"restingHeartRate":69}}],"activities-heart-intraday":{"dataset":[{"time":"00:00:00","value":90},{"time":"00:01:00","value":89},{"time":"00:02:00","value":88},...],"datasetInterval":1,"datasetType":"minute"}}
??? info "FITBIT_HEARTRATE_INTRADAY"
**RAPIDS_COLUMN_MAPPINGS**
| RAPIDS column | Stream column |
|-----------------|-----------------|
| LOCAL_DATE_TIME | FLAG_TO_MUTATE |
| DEVICE_ID | device_id |
| HEARTRATE | FLAG_TO_MUTATE |
| HEARTRATE_ZONE | FLAG_TO_MUTATE |
**MUTATION**
- **COLUMN_MAPPINGS**
| Script column | Stream column |
|-----------------|-----------------|
| JSON_FITBIT_COLUMN | fitbit_data |
- **SCRIPTS**
```bash
src/data/streams/mutations/fitbit/parse_heartrate_intraday_json.py
```
!!! note
All columns except `DEVICE_ID` are parsed from `JSON_FITBIT_COLUMN`. `JSON_FITBIT_COLUMN` is a string column containing the JSON objects returned by Fitbit's API. See an example of the raw data RAPIDS expects for this data stream:
??? "Example of the raw data RAPIDS expects for this data stream"
|device_id |fitbit_data |
|---------------------------------------- |--------------------------------------------------------- |
|a748ee1a-1d0b-4ae9-9074-279a2b6ba524 |{"activities-heart":[{"dateTime":"2020-10-07","value":{"customHeartRateZones":[],"heartRateZones":[{"caloriesOut":1200.6102,"max":88,"min":31,"minutes":1058,"name":"Out of Range"},{"caloriesOut":760.3020,"max":120,"min":86,"minutes":366,"name":"Fat Burn"},{"caloriesOut":15.2048,"max":146,"min":120,"minutes":2,"name":"Cardio"},{"caloriesOut":0,"max":221,"min":148,"minutes":0,"name":"Peak"}],"restingHeartRate":72}}],"activities-heart-intraday":{"dataset":[{"time":"00:00:00","value":68},{"time":"00:01:00","value":67},{"time":"00:02:00","value":67},...],"datasetInterval":1,"datasetType":"minute"}}
|a748ee1a-1d0b-4ae9-9074-279a2b6ba524 |{"activities-heart":[{"dateTime":"2020-10-08","value":{"customHeartRateZones":[],"heartRateZones":[{"caloriesOut":1100.1120,"max":89,"min":30,"minutes":921,"name":"Out of Range"},{"caloriesOut":660.0012,"max":118,"min":82,"minutes":361,"name":"Fat Burn"},{"caloriesOut":23.7088,"max":142,"min":108,"minutes":3,"name":"Cardio"},{"caloriesOut":0,"max":221,"min":148,"minutes":0,"name":"Peak"}],"restingHeartRate":70}}],"activities-heart-intraday":{"dataset":[{"time":"00:00:00","value":77},{"time":"00:01:00","value":75},{"time":"00:02:00","value":73},...],"datasetInterval":1,"datasetType":"minute"}}
|a748ee1a-1d0b-4ae9-9074-279a2b6ba524 |{"activities-heart":[{"dateTime":"2020-10-09","value":{"customHeartRateZones":[],"heartRateZones":[{"caloriesOut":750.3615,"max":77,"min":30,"minutes":851,"name":"Out of Range"},{"caloriesOut":734.1516,"max":107,"min":77,"minutes":550,"name":"Fat Burn"},{"caloriesOut":131.8579,"max":130,"min":107,"minutes":29,"name":"Cardio"},{"caloriesOut":0,"max":220,"min":130,"minutes":0,"name":"Peak"}],"restingHeartRate":69}}],"activities-heart-intraday":{"dataset":[{"time":"00:00:00","value":90},{"time":"00:01:00","value":89},{"time":"00:02:00","value":88},...],"datasetInterval":1,"datasetType":"minute"}}
??? info "FITBIT_STEPS_SUMMARY" ??? info "FITBIT_STEPS_SUMMARY"
**RAPIDS_COLUMN_MAPPINGS** **RAPIDS_COLUMN_MAPPINGS**

View File

@ -15,6 +15,16 @@ This is a description of the format RAPIDS needs to process data for the followi
| HEARTRATE_DAILY_CALORIESCARDIO | Calories spent while heartrate was inside the cardio [zone](https://help.fitbit.com/articles/en_US/Help_article/1565.htm#) | | HEARTRATE_DAILY_CALORIESCARDIO | Calories spent while heartrate was inside the cardio [zone](https://help.fitbit.com/articles/en_US/Help_article/1565.htm#) |
| HEARTRATE_DAILY_CALORIESPEAK | Calories spent while heartrate was inside the peak [zone](https://help.fitbit.com/articles/en_US/Help_article/1565.htm#) | | HEARTRATE_DAILY_CALORIESPEAK | Calories spent while heartrate was inside the peak [zone](https://help.fitbit.com/articles/en_US/Help_article/1565.htm#) |
??? info "FITBIT_HEARTRATE_INTRADAY"
| RAPIDS column | Description |
|-----------------|-----------------|
| TIMESTAMP | An UNIX timestamp (13 digits) when a row of data was logged |
| LOCAL_DATE_TIME | Date time string with format `yyyy-mm-dd hh:mm:ss` |
| DEVICE_ID | A string that uniquely identifies a device |
| HEARTRATE | Intraday heartrate |
| HEARTRATE_ZONE | Heartrate [zone](https://help.fitbit.com/articles/en_US/Help_article/1565.htm#) that HEARTRATE belongs to. It is based on the heartrate zone ranges of each device |
??? info "FITBIT_STEPS_SUMMARY" ??? info "FITBIT_STEPS_SUMMARY"
| RAPIDS column | Description | | RAPIDS column | Description |

View File

@ -558,7 +558,7 @@ rule fitbit_heartrate_summary_r_features:
rule fitbit_heartrate_intraday_python_features: rule fitbit_heartrate_intraday_python_features:
input: input:
sensor_data = "data/raw/{pid}/fitbit_heartrate_intraday_parsed_with_datetime.csv", sensor_data = "data/raw/{pid}/fitbit_heartrate_intraday_with_datetime.csv",
time_segments_labels = "data/interim/time_segments/{pid}_time_segments_labels.csv" time_segments_labels = "data/interim/time_segments/{pid}_time_segments_labels.csv"
params: params:
provider = lambda wildcards: config["FITBIT_HEARTRATE_INTRADAY"]["PROVIDERS"][wildcards.provider_key.upper()], provider = lambda wildcards: config["FITBIT_HEARTRATE_INTRADAY"]["PROVIDERS"][wildcards.provider_key.upper()],
@ -571,7 +571,7 @@ rule fitbit_heartrate_intraday_python_features:
rule fitbit_heartrate_intraday_r_features: rule fitbit_heartrate_intraday_r_features:
input: input:
sensor_data = "data/raw/{pid}/fitbit_heartrate_intraday_parsed_with_datetime.csv", sensor_data = "data/raw/{pid}/fitbit_heartrate_intraday_with_datetime.csv",
time_segments_labels = "data/interim/time_segments/{pid}_time_segments_labels.csv" time_segments_labels = "data/interim/time_segments/{pid}_time_segments_labels.csv"
params: params:
provider = lambda wildcards: config["FITBIT_HEARTRATE_INTRADAY"]["PROVIDERS"][wildcards.provider_key.upper()], provider = lambda wildcards: config["FITBIT_HEARTRATE_INTRADAY"]["PROVIDERS"][wildcards.provider_key.upper()],

View File

@ -14,6 +14,20 @@ FITBIT_HEARTRATE_SUMMARY:
SCRIPTS: # List any python or r scripts that mutate your raw data SCRIPTS: # List any python or r scripts that mutate your raw data
- src/data/streams/mutations/fitbit/parse_heartrate_summary_json.py - src/data/streams/mutations/fitbit/parse_heartrate_summary_json.py
FITBIT_HEARTRATE_INTRADAY:
RAPIDS_COLUMN_MAPPINGS:
TIMESTAMP: FLAG_TO_MUTATE
DEVICE_ID: device_id
LOCAL_DATE_TIME: FLAG_TO_MUTATE
HEARTRATE: FLAG_TO_MUTATE
HEARTRATE_ZONE: FLAG_TO_MUTATE
MUTATION:
COLUMN_MAPPINGS:
JSON_FITBIT_COLUMN: fitbit_data # text column with JSON objects
SCRIPTS: # List any python or r scripts that mutate your raw data
- src/data/streams/mutations/fitbit/parse_heartrate_intraday_json.py
FITBIT_STEPS_SUMMARY: FITBIT_STEPS_SUMMARY:
RAPIDS_COLUMN_MAPPINGS: RAPIDS_COLUMN_MAPPINGS:
TIMESTAMP: FLAG_TO_MUTATE TIMESTAMP: FLAG_TO_MUTATE

View File

@ -0,0 +1,83 @@
import json
import pandas as pd
from datetime import datetime
HR_INTRADAY_COLUMNS = ("device_id",
"heartrate",
"heartrate_zone",
"local_date_time",
"timestamp")
def parseHeartrateZones(heartrate_data):
# Get the range of heartrate zones: outofrange, fatburn, cardio, peak
# refer to: https://help.fitbit.com/articles/en_US/Help_article/1565
heartrate_fitbit_data = heartrate_data["activities-heart"][0]
# API Version X: not sure the exact version
if "heartRateZones" in heartrate_fitbit_data:
heartrate_zones = heartrate_fitbit_data["heartRateZones"]
# API VERSION Y: not sure the exact version
elif "value" in heartrate_fitbit_data:
heartrate_zones = heartrate_fitbit_data["value"]["heartRateZones"]
else:
raise ValueError("Heartrate zone are stored in an unknown format, this could mean Fitbit's heartrate API changed")
heartrate_zones_range = {}
for hrzone in heartrate_zones:
heartrate_zones_range[hrzone["name"].lower().replace(" ", "")] = [hrzone["min"], hrzone["max"]]
return heartrate_zones_range
def parseHeartrateIntradayData(records_intraday, dataset, device_id, curr_date, heartrate_zones_range):
for data in dataset:
d_time = datetime.strptime(data["time"], '%H:%M:%S').time()
d_datetime = datetime.combine(curr_date, d_time)
d_hr = data["value"]
# Get heartrate zone by range: min <= heartrate < max
d_hrzone = None
for hrzone, hrrange in heartrate_zones_range.items():
if d_hr >= hrrange[0] and d_hr < hrrange[1]:
d_hrzone = hrzone
break
row_intraday = (device_id,
d_hr, d_hrzone,
d_datetime,
0)
records_intraday.append(row_intraday)
return records_intraday
def parseHeartrateData(heartrate_data):
if heartrate_data.empty:
return pd.DataFrame(columns=HR_INTRADAY_COLUMNS)
device_id = heartrate_data["device_id"].iloc[0]
records_intraday = []
# Parse JSON into individual records
for record in heartrate_data.json_fitbit_column:
record = json.loads(record) # Parse text into JSON
if "activities-heart" in record:
heartrate_zones_range = parseHeartrateZones(record)
curr_date = datetime.strptime(record["activities-heart"][0]["dateTime"], "%Y-%m-%d")
if "activities-heart-intraday" in record:
dataset = record["activities-heart-intraday"]["dataset"]
records_intraday = parseHeartrateIntradayData(records_intraday, dataset, device_id, curr_date, heartrate_zones_range)
parsed_data = pd.DataFrame(data=records_intraday, columns=HR_INTRADAY_COLUMNS)
return parsed_data
def main(json_raw, stream_parameters):
parsed_data = parseHeartrateData(json_raw)
parsed_data["timestamp"] = 0 # this column is added at readable_datetime.R because we neeed to take into account multiple timezones
parsed_data['local_date_time'] = parsed_data['local_date_time'].dt.strftime('%Y-%m-%d %H:%M:%S')
return(parsed_data)

View File

@ -1,8 +1,6 @@
import yaml, json, sys import json
import pandas as pd import pandas as pd
import numpy as np from datetime import datetime
from datetime import datetime, timezone
from math import trunc
HR_SUMMARY_COLUMNS = ("device_id", HR_SUMMARY_COLUMNS = ("device_id",

View File

@ -102,6 +102,13 @@ FITBIT_HEARTRATE_SUMMARY:
- HEARTRATE_DAILY_CALORIESCARDIO - HEARTRATE_DAILY_CALORIESCARDIO
- HEARTRATE_DAILY_CALORIESPEAK - HEARTRATE_DAILY_CALORIESPEAK
FITBIT_HEARTRATE_INTRADAY:
- TIMESTAMP
- DEVICE_ID
- LOCAL_DATE_TIME
- HEARTRATE
- HEARTRATE_ZONE
FITBIT_STEPS_SUMMARY: FITBIT_STEPS_SUMMARY:
- TIMESTAMP - TIMESTAMP
- DEVICE_ID - DEVICE_ID