Add sleep summary to fitbitjson_mysql
parent
cf0afeb08d
commit
a420f5ef92
|
@ -244,8 +244,7 @@ for provider in config["FITBIT_HEARTRATE_INTRADAY"]["PROVIDERS"].keys():
|
||||||
for provider in config["FITBIT_SLEEP_SUMMARY"]["PROVIDERS"].keys():
|
for provider in config["FITBIT_SLEEP_SUMMARY"]["PROVIDERS"].keys():
|
||||||
if config["FITBIT_SLEEP_SUMMARY"]["PROVIDERS"][provider]["COMPUTE"]:
|
if config["FITBIT_SLEEP_SUMMARY"]["PROVIDERS"][provider]["COMPUTE"]:
|
||||||
files_to_compute.extend(expand("data/raw/{pid}/fitbit_sleep_summary_raw.csv", pid=config["PIDS"]))
|
files_to_compute.extend(expand("data/raw/{pid}/fitbit_sleep_summary_raw.csv", pid=config["PIDS"]))
|
||||||
files_to_compute.extend(expand("data/raw/{pid}/fitbit_sleep_summary_parsed.csv", pid=config["PIDS"]))
|
files_to_compute.extend(expand("data/raw/{pid}/fitbit_sleep_summary_with_datetime.csv", pid=config["PIDS"]))
|
||||||
files_to_compute.extend(expand("data/raw/{pid}/fitbit_sleep_summary_parsed_with_datetime.csv", pid=config["PIDS"]))
|
|
||||||
files_to_compute.extend(expand("data/interim/{pid}/fitbit_sleep_summary_features/fitbit_sleep_summary_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["FITBIT_SLEEP_SUMMARY"]["PROVIDERS"][provider]["SRC_LANGUAGE"].lower(), provider_key=provider.lower()))
|
files_to_compute.extend(expand("data/interim/{pid}/fitbit_sleep_summary_features/fitbit_sleep_summary_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["FITBIT_SLEEP_SUMMARY"]["PROVIDERS"][provider]["SRC_LANGUAGE"].lower(), provider_key=provider.lower()))
|
||||||
files_to_compute.extend(expand("data/processed/features/{pid}/fitbit_sleep_summary.csv", pid=config["PIDS"]))
|
files_to_compute.extend(expand("data/processed/features/{pid}/fitbit_sleep_summary.csv", pid=config["PIDS"]))
|
||||||
files_to_compute.extend(expand("data/processed/features/{pid}/all_sensor_features.csv", pid=config["PIDS"]))
|
files_to_compute.extend(expand("data/processed/features/{pid}/all_sensor_features.csv", pid=config["PIDS"]))
|
||||||
|
|
|
@ -340,6 +340,7 @@ FITBIT_DATA_STREAMS:
|
||||||
# AVAILABLE:
|
# AVAILABLE:
|
||||||
fitbitjson_mysql:
|
fitbitjson_mysql:
|
||||||
DATABASE_GROUP: MY_GROUP
|
DATABASE_GROUP: MY_GROUP
|
||||||
|
SLEEP_SUMMARY_EPISODE_DAY_ANCHOR: end # summary sleep episodes are considered as events based on either the start timestamp or end timestamp.
|
||||||
|
|
||||||
# Sensors ------
|
# Sensors ------
|
||||||
|
|
||||||
|
@ -378,7 +379,6 @@ FITBIT_HEARTRATE_INTRADAY:
|
||||||
# See https://www.rapids.science/latest/features/fitbit-sleep-summary/
|
# See https://www.rapids.science/latest/features/fitbit-sleep-summary/
|
||||||
FITBIT_SLEEP_SUMMARY:
|
FITBIT_SLEEP_SUMMARY:
|
||||||
TABLE: sleep_summary
|
TABLE: sleep_summary
|
||||||
SLEEP_EPISODE_TIMESTAMP: end # summary sleep episodes are considered as events based on either the start timestamp or end timestamp.
|
|
||||||
PROVIDERS:
|
PROVIDERS:
|
||||||
RAPIDS:
|
RAPIDS:
|
||||||
COMPUTE: False
|
COMPUTE: False
|
||||||
|
|
|
@ -98,6 +98,53 @@ If you want RAPIDS to process Fitbit sensor data using this stream, you will nee
|
||||||
|a748ee1a-1d0b-4ae9-9074-279a2b6ba524 |{"activities-heart":[{"dateTime":"2020-10-08","value":{"customHeartRateZones":[],"heartRateZones":[{"caloriesOut":1100.1120,"max":89,"min":30,"minutes":921,"name":"Out of Range"},{"caloriesOut":660.0012,"max":118,"min":82,"minutes":361,"name":"Fat Burn"},{"caloriesOut":23.7088,"max":142,"min":108,"minutes":3,"name":"Cardio"},{"caloriesOut":0,"max":221,"min":148,"minutes":0,"name":"Peak"}],"restingHeartRate":70}}],"activities-heart-intraday":{"dataset":[{"time":"00:00:00","value":77},{"time":"00:01:00","value":75},{"time":"00:02:00","value":73},...],"datasetInterval":1,"datasetType":"minute"}}
|
|a748ee1a-1d0b-4ae9-9074-279a2b6ba524 |{"activities-heart":[{"dateTime":"2020-10-08","value":{"customHeartRateZones":[],"heartRateZones":[{"caloriesOut":1100.1120,"max":89,"min":30,"minutes":921,"name":"Out of Range"},{"caloriesOut":660.0012,"max":118,"min":82,"minutes":361,"name":"Fat Burn"},{"caloriesOut":23.7088,"max":142,"min":108,"minutes":3,"name":"Cardio"},{"caloriesOut":0,"max":221,"min":148,"minutes":0,"name":"Peak"}],"restingHeartRate":70}}],"activities-heart-intraday":{"dataset":[{"time":"00:00:00","value":77},{"time":"00:01:00","value":75},{"time":"00:02:00","value":73},...],"datasetInterval":1,"datasetType":"minute"}}
|
||||||
|a748ee1a-1d0b-4ae9-9074-279a2b6ba524 |{"activities-heart":[{"dateTime":"2020-10-09","value":{"customHeartRateZones":[],"heartRateZones":[{"caloriesOut":750.3615,"max":77,"min":30,"minutes":851,"name":"Out of Range"},{"caloriesOut":734.1516,"max":107,"min":77,"minutes":550,"name":"Fat Burn"},{"caloriesOut":131.8579,"max":130,"min":107,"minutes":29,"name":"Cardio"},{"caloriesOut":0,"max":220,"min":130,"minutes":0,"name":"Peak"}],"restingHeartRate":69}}],"activities-heart-intraday":{"dataset":[{"time":"00:00:00","value":90},{"time":"00:01:00","value":89},{"time":"00:02:00","value":88},...],"datasetInterval":1,"datasetType":"minute"}}
|
|a748ee1a-1d0b-4ae9-9074-279a2b6ba524 |{"activities-heart":[{"dateTime":"2020-10-09","value":{"customHeartRateZones":[],"heartRateZones":[{"caloriesOut":750.3615,"max":77,"min":30,"minutes":851,"name":"Out of Range"},{"caloriesOut":734.1516,"max":107,"min":77,"minutes":550,"name":"Fat Burn"},{"caloriesOut":131.8579,"max":130,"min":107,"minutes":29,"name":"Cardio"},{"caloriesOut":0,"max":220,"min":130,"minutes":0,"name":"Peak"}],"restingHeartRate":69}}],"activities-heart-intraday":{"dataset":[{"time":"00:00:00","value":90},{"time":"00:01:00","value":89},{"time":"00:02:00","value":88},...],"datasetInterval":1,"datasetType":"minute"}}
|
||||||
|
|
||||||
|
??? info "FITBIT_SLEEP_SUMMARY"
|
||||||
|
|
||||||
|
**RAPIDS_COLUMN_MAPPINGS**
|
||||||
|
|
||||||
|
| RAPIDS column | Stream column |
|
||||||
|
|-----------------|-----------------|
|
||||||
|
| TIMESTAMP | FLAG_TO_MUTATE |
|
||||||
|
| LOCAL_DATE_TIME | FLAG_TO_MUTATE |
|
||||||
|
| LOCAL_START_DATE_TIME | FLAG_TO_MUTATE |
|
||||||
|
| LOCAL_END_DATE_TIME | FLAG_TO_MUTATE |
|
||||||
|
| DEVICE_ID | device_id |
|
||||||
|
| EFFICIENCY | FLAG_TO_MUTATE |
|
||||||
|
| MINUTES_AFTER_WAKEUP | FLAG_TO_MUTATE |
|
||||||
|
| MINUTES_ASLEEP | FLAG_TO_MUTATE |
|
||||||
|
| MINUTES_AWAKE | FLAG_TO_MUTATE |
|
||||||
|
| MINUTES_TO_FALL_ASLEEP | FLAG_TO_MUTATE |
|
||||||
|
| MINUTES_IN_BED | FLAG_TO_MUTATE |
|
||||||
|
| IS_MAIN_SLEEP | FLAG_TO_MUTATE |
|
||||||
|
| TYPE | FLAG_TO_MUTATE |
|
||||||
|
|
||||||
|
**MUTATION**
|
||||||
|
|
||||||
|
- **COLUMN_MAPPINGS**
|
||||||
|
|
||||||
|
| Script column | Stream column |
|
||||||
|
|-----------------|-----------------|
|
||||||
|
| JSON_FITBIT_COLUMN | fitbit_data |
|
||||||
|
|
||||||
|
- **SCRIPTS**
|
||||||
|
|
||||||
|
```bash
|
||||||
|
src/data/streams/mutations/fitbit/parse_sleep_summary_json.py
|
||||||
|
```
|
||||||
|
|
||||||
|
!!! note
|
||||||
|
|
||||||
|
Fitbit API has two versions for sleep data, v1 and v1.2. We support both but ignore v1's `count_awake`, `duration_awake`, and `count_awakenings`, `count_restless`, `duration_restless` columns.
|
||||||
|
|
||||||
|
All columns except `DEVICE_ID` are parsed from `JSON_FITBIT_COLUMN`. `JSON_FITBIT_COLUMN` is a string column containing the JSON objects returned by Fitbit's API. See an example of the raw data RAPIDS expects for this data stream:
|
||||||
|
|
||||||
|
??? example "Example of the expected raw data"
|
||||||
|
|
||||||
|
|device_id |fitbit_data |
|
||||||
|
|---------------------------------------- |--------------------------------------------------------- |
|
||||||
|
|a748ee1a-1d0b-4ae9-9074-279a2b6ba524 |{"sleep":[{"dateOfSleep":"2020-10-10","duration":3600000,"efficiency":92,"endTime":"2020-10-10T16:37:00.000","infoCode":2,"isMainSleep":false,"levels":{"data":[{"dateTime":"2020-10-10T15:36:30.000","level":"restless","seconds":60},{"dateTime":"2020-10-10T15:37:30.000","level":"asleep","seconds":660},{"dateTime":"2020-10-10T15:48:30.000","level":"restless","seconds":60},...], "summary":{"asleep":{"count":0,"minutes":56},"awake":{"count":0,"minutes":0},"restless":{"count":3,"minutes":4}}},"logId":26315914306,"minutesAfterWakeup":0,"minutesAsleep":55,"minutesAwake":5,"minutesToFallAsleep":0,"startTime":"2020-10-10T15:36:30.000","timeInBed":60,"type":"classic"},{"dateOfSleep":"2020-10-10","duration":22980000,"efficiency":88,"endTime":"2020-10-10T08:10:00.000","infoCode":0,"isMainSleep":true,"levels":{"data":[{"dateTime":"2020-10-10T01:46:30.000","level":"light","seconds":420},{"dateTime":"2020-10-10T01:53:30.000","level":"deep","seconds":1230},{"dateTime":"2020-10-10T02:14:00.000","level":"light","seconds":360},...], "summary":{"deep":{"count":3,"minutes":92,"thirtyDayAvgMinutes":0},"light":{"count":29,"minutes":193,"thirtyDayAvgMinutes":0},"rem":{"count":4,"minutes":33,"thirtyDayAvgMinutes":0},"wake":{"count":28,"minutes":65,"thirtyDayAvgMinutes":0}}},"logId":26311786557,"minutesAfterWakeup":0,"minutesAsleep":318,"minutesAwake":65,"minutesToFallAsleep":0,"startTime":"2020-10-10T01:46:30.000","timeInBed":383,"type":"stages"}],"summary":{"stages":{"deep":92,"light":193,"rem":33,"wake":65},"totalMinutesAsleep":373,"totalSleepRecords":2,"totalTimeInBed":443}}
|
||||||
|
|a748ee1a-1d0b-4ae9-9074-279a2b6ba524 |{"sleep":[{"dateOfSleep":"2020-10-11","duration":41640000,"efficiency":89,"endTime":"2020-10-11T11:47:00.000","infoCode":0,"isMainSleep":true,"levels":{"data":[{"dateTime":"2020-10-11T00:12:30.000","level":"wake","seconds":450},{"dateTime":"2020-10-11T00:20:00.000","level":"light","seconds":870},{"dateTime":"2020-10-11T00:34:30.000","level":"wake","seconds":780},...], "summary":{"deep":{"count":4,"minutes":52,"thirtyDayAvgMinutes":62},"light":{"count":32,"minutes":442,"thirtyDayAvgMinutes":364},"rem":{"count":6,"minutes":68,"thirtyDayAvgMinutes":58},"wake":{"count":29,"minutes":132,"thirtyDayAvgMinutes":94}}},"logId":26589710670,"minutesAfterWakeup":1,"minutesAsleep":562,"minutesAwake":132,"minutesToFallAsleep":0,"startTime":"2020-10-11T00:12:30.000","timeInBed":694,"type":"stages"}],"summary":{"stages":{"deep":52,"light":442,"rem":68,"wake":132},"totalMinutesAsleep":562,"totalSleepRecords":1,"totalTimeInBed":694}}
|
||||||
|
|a748ee1a-1d0b-4ae9-9074-279a2b6ba524 |{"sleep":[{"dateOfSleep":"2020-10-12","duration":28980000,"efficiency":93,"endTime":"2020-10-12T09:34:30.000","infoCode":0,"isMainSleep":true,"levels":{"data":[{"dateTime":"2020-10-12T01:31:00.000","level":"wake","seconds":600},{"dateTime":"2020-10-12T01:41:00.000","level":"light","seconds":60},{"dateTime":"2020-10-12T01:42:00.000","level":"deep","seconds":2340},...], "summary":{"deep":{"count":4,"minutes":63,"thirtyDayAvgMinutes":59},"light":{"count":27,"minutes":257,"thirtyDayAvgMinutes":364},"rem":{"count":5,"minutes":94,"thirtyDayAvgMinutes":58},"wake":{"count":24,"minutes":69,"thirtyDayAvgMinutes":95}}},"logId":26589710673,"minutesAfterWakeup":0,"minutesAsleep":415,"minutesAwake":68,"minutesToFallAsleep":0,"startTime":"2020-10-12T01:31:00.000","timeInBed":483,"type":"stages"}],"summary":{"stages":{"deep":63,"light":257,"rem":94,"wake":69},"totalMinutesAsleep":415,"totalSleepRecords":1,"totalTimeInBed":483}}
|
||||||
|
|
||||||
??? info "FITBIT_STEPS_SUMMARY"
|
??? info "FITBIT_STEPS_SUMMARY"
|
||||||
|
|
||||||
|
|
|
@ -25,6 +25,24 @@ This is a description of the format RAPIDS needs to process data for the followi
|
||||||
| HEARTRATE | Intraday heartrate |
|
| HEARTRATE | Intraday heartrate |
|
||||||
| HEARTRATE_ZONE | Heartrate [zone](https://help.fitbit.com/articles/en_US/Help_article/1565.htm#) that HEARTRATE belongs to. It is based on the heartrate zone ranges of each device |
|
| HEARTRATE_ZONE | Heartrate [zone](https://help.fitbit.com/articles/en_US/Help_article/1565.htm#) that HEARTRATE belongs to. It is based on the heartrate zone ranges of each device |
|
||||||
|
|
||||||
|
??? info "FITBIT_SLEEP_SUMMARY"
|
||||||
|
|
||||||
|
| RAPIDS column | Description |
|
||||||
|
|-----------------|-----------------|
|
||||||
|
| TIMESTAMP | An UNIX timestamp (13 digits) when a row of data was logged |
|
||||||
|
| LOCAL_DATE_TIME | Date time string with format `yyyy-mm-dd hh:mm:ss`, this either is a copy of LOCAL_START_DATE_TIME or LOCAL_END_DATE_TIME depending on which column is used to assign an episode to a specific day|
|
||||||
|
| LOCAL_START_DATE_TIME | Date time string with format `yyyy-mm-dd hh:mm:ss` representing the start of a daily sleep episode |
|
||||||
|
| LOCAL_END_DATE_TIME | Date time string with format `yyyy-mm-dd hh:mm:ss` representing the end of a daily sleep episode|
|
||||||
|
| DEVICE_ID | A string that uniquely identifies a device |
|
||||||
|
| EFFICIENCY | Sleep efficiency computed by fitbit as time asleep / (total time in bed - time to fall asleep)|
|
||||||
|
| MINUTES_AFTER_WAKEUP | Minutes the participant spent in bed after waking up|
|
||||||
|
| MINUTES_ASLEEP | Minutes the participant was asleep |
|
||||||
|
| MINUTES_AWAKE | Minutes the participant was awake |
|
||||||
|
| MINUTES_TO_FALL_ASLEEP | Minutes the participant spent in bed before falling asleep|
|
||||||
|
| MINUTES_IN_BED | Minutes the participant spent in bed across the sleep episode|
|
||||||
|
| IS_MAIN_SLEEP | 0 if this episode is a nap, or 1 if it is a main sleep episode|
|
||||||
|
| TYPE | stages or classic [sleep data](https://dev.fitbit.com/build/reference/web-api/sleep/)|
|
||||||
|
|
||||||
??? info "FITBIT_STEPS_SUMMARY"
|
??? info "FITBIT_STEPS_SUMMARY"
|
||||||
|
|
||||||
| RAPIDS column | Description |
|
| RAPIDS column | Description |
|
||||||
|
|
|
@ -636,7 +636,7 @@ rule fitbit_steps_intraday_r_features:
|
||||||
|
|
||||||
rule fitbit_sleep_summary_python_features:
|
rule fitbit_sleep_summary_python_features:
|
||||||
input:
|
input:
|
||||||
sensor_data = "data/raw/{pid}/fitbit_sleep_summary_parsed_with_datetime.csv",
|
sensor_data = "data/raw/{pid}/fitbit_sleep_summary_with_datetime.csv",
|
||||||
time_segments_labels = "data/interim/time_segments/{pid}_time_segments_labels.csv"
|
time_segments_labels = "data/interim/time_segments/{pid}_time_segments_labels.csv"
|
||||||
params:
|
params:
|
||||||
provider = lambda wildcards: config["FITBIT_SLEEP_SUMMARY"]["PROVIDERS"][wildcards.provider_key.upper()],
|
provider = lambda wildcards: config["FITBIT_SLEEP_SUMMARY"]["PROVIDERS"][wildcards.provider_key.upper()],
|
||||||
|
@ -649,7 +649,7 @@ rule fitbit_sleep_summary_python_features:
|
||||||
|
|
||||||
rule fitbit_sleep_summary_r_features:
|
rule fitbit_sleep_summary_r_features:
|
||||||
input:
|
input:
|
||||||
sensor_data = "data/raw/{pid}/fitbit_sleep_summary_parsed_with_datetime.csv",
|
sensor_data = "data/raw/{pid}/fitbit_sleep_summary_with_datetime.csv",
|
||||||
time_segments_labels = "data/interim/time_segments/{pid}_time_segments_labels.csv"
|
time_segments_labels = "data/interim/time_segments/{pid}_time_segments_labels.csv"
|
||||||
params:
|
params:
|
||||||
provider = lambda wildcards: config["FITBIT_SLEEP_SUMMARY"]["PROVIDERS"][wildcards.provider_key.upper()],
|
provider = lambda wildcards: config["FITBIT_SLEEP_SUMMARY"]["PROVIDERS"][wildcards.provider_key.upper()],
|
||||||
|
|
|
@ -1,161 +0,0 @@
|
||||||
import yaml, json, sys
|
|
||||||
import pandas as pd
|
|
||||||
import numpy as np
|
|
||||||
from datetime import datetime, timezone
|
|
||||||
from math import trunc
|
|
||||||
|
|
||||||
|
|
||||||
HR_SUMMARY_COLUMNS = ("device_id",
|
|
||||||
"local_date_time",
|
|
||||||
"timestamp",
|
|
||||||
"heartrate_daily_restinghr",
|
|
||||||
"heartrate_daily_caloriesoutofrange",
|
|
||||||
"heartrate_daily_caloriesfatburn",
|
|
||||||
"heartrate_daily_caloriescardio",
|
|
||||||
"heartrate_daily_caloriespeak")
|
|
||||||
|
|
||||||
HR_INTRADAY_COLUMNS = ("device_id",
|
|
||||||
"heartrate",
|
|
||||||
"heartrate_zone",
|
|
||||||
"local_date_time",
|
|
||||||
"timestamp")
|
|
||||||
|
|
||||||
def parseHeartrateZones(heartrate_data):
|
|
||||||
# Get the range of heartrate zones: outofrange, fatburn, cardio, peak
|
|
||||||
# refer to: https://help.fitbit.com/articles/en_US/Help_article/1565
|
|
||||||
|
|
||||||
heartrate_fitbit_data = json.loads(heartrate_data["fitbit_data"].iloc[0])["activities-heart"][0]
|
|
||||||
# API Version X: not sure the exact version
|
|
||||||
if "heartRateZones" in heartrate_fitbit_data:
|
|
||||||
heartrate_zones = heartrate_fitbit_data["heartRateZones"]
|
|
||||||
# API VERSION Y: not sure the exact version
|
|
||||||
elif "value" in heartrate_fitbit_data:
|
|
||||||
heartrate_zones = heartrate_fitbit_data["value"]["heartRateZones"]
|
|
||||||
else:
|
|
||||||
raise ValueError("Heartrate zone are stored in an unkown format, this could mean Fitbit's heartrate API changed")
|
|
||||||
|
|
||||||
heartrate_zones_range = {}
|
|
||||||
for hrzone in heartrate_zones:
|
|
||||||
heartrate_zones_range[hrzone["name"].lower().replace(" ", "")] = [hrzone["min"], hrzone["max"]]
|
|
||||||
return heartrate_zones_range
|
|
||||||
|
|
||||||
def parseHeartrateSummaryData(record_summary, device_id, curr_date):
|
|
||||||
# API Version X: not sure the exact version
|
|
||||||
if "heartRateZones" in record_summary:
|
|
||||||
heartrate_zones = record_summary["heartRateZones"]
|
|
||||||
d_resting_heartrate = record_summary["value"] if "value" in record_summary else None
|
|
||||||
# API VERSION Y: not sure the exact version
|
|
||||||
elif "value" in record_summary:
|
|
||||||
heartrate_zones = record_summary["value"]["heartRateZones"]
|
|
||||||
d_resting_heartrate = record_summary["value"]["restingHeartRate"] if "restingHeartRate" in record_summary["value"] else None
|
|
||||||
else:
|
|
||||||
ValueError("Heartrate zone are stored in an unkown format, this could mean Fitbit's heartrate API changed")
|
|
||||||
|
|
||||||
if "caloriesOut" in heartrate_zones[0]:
|
|
||||||
d_calories_outofrange = heartrate_zones[0]["caloriesOut"]
|
|
||||||
d_calories_fatburn = heartrate_zones[1]["caloriesOut"]
|
|
||||||
d_calories_cardio = heartrate_zones[2]["caloriesOut"]
|
|
||||||
d_calories_peak = heartrate_zones[3]["caloriesOut"]
|
|
||||||
else:
|
|
||||||
d_calories_outofrange, d_calories_fatburn, d_calories_cardio, d_calories_peak = None, None, None, None
|
|
||||||
|
|
||||||
row_summary = (device_id,
|
|
||||||
curr_date,
|
|
||||||
0,
|
|
||||||
d_resting_heartrate,
|
|
||||||
d_calories_outofrange,
|
|
||||||
d_calories_fatburn,
|
|
||||||
d_calories_cardio,
|
|
||||||
d_calories_peak)
|
|
||||||
return row_summary
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def parseHeartrateIntradayData(records_intraday, dataset, device_id, curr_date, heartrate_zones_range):
|
|
||||||
for data in dataset:
|
|
||||||
d_time = datetime.strptime(data["time"], '%H:%M:%S').time()
|
|
||||||
d_datetime = datetime.combine(curr_date, d_time)
|
|
||||||
d_hr = data["value"]
|
|
||||||
|
|
||||||
# Get heartrate zone by range: min <= heartrate < max
|
|
||||||
d_hrzone = None
|
|
||||||
for hrzone, hrrange in heartrate_zones_range.items():
|
|
||||||
if d_hr >= hrrange[0] and d_hr < hrrange[1]:
|
|
||||||
d_hrzone = hrzone
|
|
||||||
break
|
|
||||||
|
|
||||||
row_intraday = (device_id,
|
|
||||||
d_hr, d_hrzone,
|
|
||||||
d_datetime,
|
|
||||||
0)
|
|
||||||
|
|
||||||
records_intraday.append(row_intraday)
|
|
||||||
return records_intraday
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def parseHeartrateData(heartrate_data, fitbit_data_type):
|
|
||||||
if heartrate_data.empty:
|
|
||||||
if fitbit_data_type == "summary":
|
|
||||||
return pd.DataFrame(columns=HR_SUMMARY_COLUMNS)
|
|
||||||
elif fitbit_data_type == "intraday":
|
|
||||||
return pd.DataFrame(columns=HR_INTRADAY_COLUMNS)
|
|
||||||
|
|
||||||
device_id = heartrate_data["device_id"].iloc[0]
|
|
||||||
records_summary, records_intraday = [], []
|
|
||||||
|
|
||||||
heartrate_zones_range = parseHeartrateZones(heartrate_data)
|
|
||||||
|
|
||||||
# Parse JSON into individual records
|
|
||||||
for record in heartrate_data.fitbit_data:
|
|
||||||
record = json.loads(record) # Parse text into JSON
|
|
||||||
curr_date = datetime.strptime(record["activities-heart"][0]["dateTime"], "%Y-%m-%d")
|
|
||||||
|
|
||||||
if fitbit_data_type == "summary":
|
|
||||||
record_summary = record["activities-heart"][0]
|
|
||||||
row_summary = parseHeartrateSummaryData(record_summary, device_id, curr_date)
|
|
||||||
records_summary.append(row_summary)
|
|
||||||
|
|
||||||
if fitbit_data_type == "intraday":
|
|
||||||
dataset = record["activities-heart-intraday"]["dataset"]
|
|
||||||
records_intraday = parseHeartrateIntradayData(records_intraday, dataset, device_id, curr_date, heartrate_zones_range)
|
|
||||||
|
|
||||||
if fitbit_data_type == "summary":
|
|
||||||
parsed_data = pd.DataFrame(data=records_summary, columns=HR_SUMMARY_COLUMNS)
|
|
||||||
elif fitbit_data_type == "intraday":
|
|
||||||
parsed_data = pd.DataFrame(data=records_intraday, columns=HR_INTRADAY_COLUMNS)
|
|
||||||
return parsed_data
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
timezone = snakemake.params["timezone"]
|
|
||||||
column_format = snakemake.params["column_format"]
|
|
||||||
fitbit_data_type = snakemake.params["fitbit_data_type"]
|
|
||||||
|
|
||||||
with open(snakemake.input["participant_file"], "r", encoding="utf-8") as f:
|
|
||||||
participant_file = yaml.safe_load(f)
|
|
||||||
local_start_date = pd.Timestamp(participant_file["FITBIT"]["START_DATE"])
|
|
||||||
local_end_date = pd.Timestamp(participant_file["FITBIT"]["END_DATE"]) + pd.DateOffset(1)
|
|
||||||
|
|
||||||
if column_format == "JSON":
|
|
||||||
json_raw = pd.read_csv(snakemake.input["raw_data"])
|
|
||||||
parsed_data = parseHeartrateData(json_raw, fitbit_data_type)
|
|
||||||
elif column_format == "PLAIN_TEXT":
|
|
||||||
parsed_data = pd.read_csv(snakemake.input["raw_data"], parse_dates=["local_date_time"], date_parser=lambda col: pd.to_datetime(col).tz_localize(None))
|
|
||||||
else:
|
|
||||||
raise ValueError("column_format can only be one of ['JSON', 'PLAIN_TEXT'].")
|
|
||||||
|
|
||||||
# discard rows with restinghr = 0
|
|
||||||
if fitbit_data_type == "summary":
|
|
||||||
parsed_data = parsed_data[(parsed_data["heartrate_daily_restinghr"] != "0") & (parsed_data["heartrate_daily_restinghr"] != 0)]
|
|
||||||
|
|
||||||
# Only keep dates in the range of [local_start_date, local_end_date)
|
|
||||||
if not pd.isnull(local_start_date) and not pd.isnull(local_end_date):
|
|
||||||
parsed_data = parsed_data.loc[(parsed_data["local_date_time"] >= local_start_date) & (parsed_data["local_date_time"] < local_end_date)]
|
|
||||||
|
|
||||||
if parsed_data.shape[0] > 0:
|
|
||||||
parsed_data["timestamp"] = parsed_data["local_date_time"].dt.tz_localize(timezone, ambiguous=False, nonexistent="NaT").dropna().astype(np.int64) // 10**6
|
|
||||||
parsed_data.dropna(subset=['timestamp'], inplace=True)
|
|
||||||
|
|
||||||
parsed_data.to_csv(snakemake.output[0], index=False)
|
|
|
@ -27,6 +27,26 @@ FITBIT_HEARTRATE_INTRADAY:
|
||||||
SCRIPTS: # List any python or r scripts that mutate your raw data
|
SCRIPTS: # List any python or r scripts that mutate your raw data
|
||||||
- src/data/streams/mutations/fitbit/parse_heartrate_intraday_json.py
|
- src/data/streams/mutations/fitbit/parse_heartrate_intraday_json.py
|
||||||
|
|
||||||
|
FITBIT_SLEEP_SUMMARY:
|
||||||
|
RAPIDS_COLUMN_MAPPINGS:
|
||||||
|
TIMESTAMP: FLAG_TO_MUTATE
|
||||||
|
DEVICE_ID: device_id
|
||||||
|
LOCAL_DATE_TIME: FLAG_TO_MUTATE
|
||||||
|
LOCAL_START_DATE_TIME: FLAG_TO_MUTATE
|
||||||
|
LOCAL_END_DATE_TIME: FLAG_TO_MUTATE
|
||||||
|
EFFICIENCY: FLAG_TO_MUTATE
|
||||||
|
MINUTES_AFTER_WAKEUP: FLAG_TO_MUTATE
|
||||||
|
MINUTES_ASLEEP: FLAG_TO_MUTATE
|
||||||
|
MINUTES_AWAKE: FLAG_TO_MUTATE
|
||||||
|
MINUTES_TO_FALL_ASLEEP: FLAG_TO_MUTATE
|
||||||
|
MINUTES_IN_BED: FLAG_TO_MUTATE
|
||||||
|
IS_MAIN_SLEEP: FLAG_TO_MUTATE
|
||||||
|
TYPE: FLAG_TO_MUTATE
|
||||||
|
MUTATION:
|
||||||
|
COLUMN_MAPPINGS:
|
||||||
|
JSON_FITBIT_COLUMN: fitbit_data # text column with JSON objects
|
||||||
|
SCRIPTS: # List any python or r scripts that mutate your raw data
|
||||||
|
- src/data/streams/mutations/fitbit/parse_sleep_summary_json.py
|
||||||
|
|
||||||
FITBIT_STEPS_SUMMARY:
|
FITBIT_STEPS_SUMMARY:
|
||||||
RAPIDS_COLUMN_MAPPINGS:
|
RAPIDS_COLUMN_MAPPINGS:
|
||||||
|
|
|
@ -0,0 +1,70 @@
|
||||||
|
import json, yaml
|
||||||
|
import pandas as pd
|
||||||
|
import numpy as np
|
||||||
|
from datetime import datetime, timedelta
|
||||||
|
import dateutil.parser
|
||||||
|
|
||||||
|
SLEEP_SUMMARY_COLUMNS = ("device_id", "efficiency",
|
||||||
|
"minutes_after_wakeup", "minutes_asleep", "minutes_awake", "minutes_to_fall_asleep", "minutes_in_bed",
|
||||||
|
"is_main_sleep", "type",
|
||||||
|
"local_start_date_time", "local_end_date_time",
|
||||||
|
"timestamp")
|
||||||
|
|
||||||
|
|
||||||
|
# Parse one record for sleep API version 1.2
|
||||||
|
def parseOneSleepRecord(record, device_id, d_is_main_sleep, records_summary, episode_type):
|
||||||
|
|
||||||
|
sleep_record_type = episode_type
|
||||||
|
|
||||||
|
d_start_datetime = datetime.strptime(record["startTime"][:18], "%Y-%m-%dT%H:%M:%S")
|
||||||
|
d_end_datetime = datetime.strptime(record["endTime"][:18], "%Y-%m-%dT%H:%M:%S")
|
||||||
|
# Summary data
|
||||||
|
row_summary = (device_id, record["efficiency"],
|
||||||
|
record["minutesAfterWakeup"], record["minutesAsleep"], record["minutesAwake"], record["minutesToFallAsleep"], record["timeInBed"],
|
||||||
|
d_is_main_sleep, sleep_record_type,
|
||||||
|
d_start_datetime, d_end_datetime,
|
||||||
|
0)
|
||||||
|
|
||||||
|
records_summary.append(row_summary)
|
||||||
|
|
||||||
|
return records_summary
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def parseSleepData(sleep_data):
|
||||||
|
if sleep_data.empty:
|
||||||
|
return pd.DataFrame(columns=SLEEP_SUMMARY_COLUMNS)
|
||||||
|
|
||||||
|
device_id = sleep_data["device_id"].iloc[0]
|
||||||
|
records_summary = []
|
||||||
|
# Parse JSON into individual records
|
||||||
|
for multi_record in sleep_data.json_fitbit_column:
|
||||||
|
sleep_record = json.loads(multi_record)
|
||||||
|
if "sleep" in sleep_record:
|
||||||
|
for record in sleep_record["sleep"]:
|
||||||
|
# Whether the sleep episode is nap (0) or main sleep (1)
|
||||||
|
d_is_main_sleep = 1 if record["isMainSleep"] else 0
|
||||||
|
|
||||||
|
# For sleep API version 1
|
||||||
|
if "awakeCount" in record:
|
||||||
|
records_summary = parseOneSleepRecord(record, device_id, d_is_main_sleep, records_summary, "classic")
|
||||||
|
# For sleep API version 1.2
|
||||||
|
else:
|
||||||
|
records_summary = parseOneSleepRecord(record, device_id, d_is_main_sleep, records_summary, record['type'])
|
||||||
|
|
||||||
|
parsed_data = pd.DataFrame(data=records_summary, columns=SLEEP_SUMMARY_COLUMNS)
|
||||||
|
|
||||||
|
return parsed_data
|
||||||
|
|
||||||
|
def main(json_raw, stream_parameters):
|
||||||
|
parsed_data = parseSleepData(json_raw)
|
||||||
|
parsed_data["timestamp"] = 0 # this column is added at readable_datetime.R because we neeed to take into account multiple timezones
|
||||||
|
parsed_data['local_start_date_time'] = parsed_data['local_start_date_time'].dt.strftime('%Y-%m-%d %H:%M:%S')
|
||||||
|
parsed_data['local_end_date_time'] = parsed_data['local_end_date_time'].dt.strftime('%Y-%m-%d %H:%M:%S')
|
||||||
|
|
||||||
|
if stream_parameters["SLEEP_SUMMARY_EPISODE_DAY_ANCHOR"] == "start":
|
||||||
|
parsed_data["local_date_time"] = parsed_data['local_start_date_time']
|
||||||
|
else:
|
||||||
|
parsed_data["local_date_time"] = parsed_data['local_end_date_time']
|
||||||
|
|
||||||
|
return(parsed_data)
|
|
@ -144,6 +144,21 @@ FITBIT_HEARTRATE_INTRADAY:
|
||||||
- HEARTRATE
|
- HEARTRATE
|
||||||
- HEARTRATE_ZONE
|
- HEARTRATE_ZONE
|
||||||
|
|
||||||
|
FITBIT_SLEEP_SUMMARY:
|
||||||
|
- TIMESTAMP
|
||||||
|
- DEVICE_ID
|
||||||
|
- LOCAL_DATE_TIME
|
||||||
|
- LOCAL_START_DATE_TIME
|
||||||
|
- LOCAL_END_DATE_TIME
|
||||||
|
- EFFICIENCY
|
||||||
|
- MINUTES_AFTER_WAKEUP
|
||||||
|
- MINUTES_ASLEEP
|
||||||
|
- MINUTES_AWAKE
|
||||||
|
- MINUTES_TO_FALL_ASLEEP
|
||||||
|
- MINUTES_IN_BED
|
||||||
|
- IS_MAIN_SLEEP
|
||||||
|
- TYPE
|
||||||
|
|
||||||
FITBIT_STEPS_SUMMARY:
|
FITBIT_STEPS_SUMMARY:
|
||||||
- TIMESTAMP
|
- TIMESTAMP
|
||||||
- DEVICE_ID
|
- DEVICE_ID
|
||||||
|
|
|
@ -832,9 +832,13 @@ properties:
|
||||||
type: string
|
type: string
|
||||||
fitbitjson_mysql:
|
fitbitjson_mysql:
|
||||||
type: object
|
type: object
|
||||||
|
required: [DATABASE_GROUP, SLEEP_SUMMARY_EPISODE_DAY_ANCHOR]
|
||||||
properties:
|
properties:
|
||||||
DATABASE_GROUP:
|
DATABASE_GROUP:
|
||||||
type: string
|
type: string
|
||||||
|
SLEEP_SUMMARY_EPISODE_DAY_ANCHOR:
|
||||||
|
type: string
|
||||||
|
enum: ["start", "end"]
|
||||||
|
|
||||||
FITBIT_DATA_YIELD:
|
FITBIT_DATA_YIELD:
|
||||||
type: object
|
type: object
|
||||||
|
@ -903,13 +907,10 @@ properties:
|
||||||
|
|
||||||
FITBIT_SLEEP_SUMMARY:
|
FITBIT_SLEEP_SUMMARY:
|
||||||
type: object
|
type: object
|
||||||
required: [TABLE, SLEEP_EPISODE_TIMESTAMP, PROVIDERS]
|
required: [TABLE, PROVIDERS]
|
||||||
properties:
|
properties:
|
||||||
TABLE:
|
TABLE:
|
||||||
type: string
|
type: string
|
||||||
SLEEP_EPISODE_TIMESTAMP:
|
|
||||||
type: string
|
|
||||||
enum: ["start", "end"]
|
|
||||||
PROVIDERS:
|
PROVIDERS:
|
||||||
type: ["null", object]
|
type: ["null", object]
|
||||||
properties:
|
properties:
|
||||||
|
|
Loading…
Reference in New Issue