Add sleep summary to fitbitjson_mysql

pull/128/head
JulioV 2021-03-10 17:22:26 -05:00
parent cf0afeb08d
commit a420f5ef92
10 changed files with 179 additions and 170 deletions

View File

@ -244,8 +244,7 @@ for provider in config["FITBIT_HEARTRATE_INTRADAY"]["PROVIDERS"].keys():
for provider in config["FITBIT_SLEEP_SUMMARY"]["PROVIDERS"].keys(): for provider in config["FITBIT_SLEEP_SUMMARY"]["PROVIDERS"].keys():
if config["FITBIT_SLEEP_SUMMARY"]["PROVIDERS"][provider]["COMPUTE"]: if config["FITBIT_SLEEP_SUMMARY"]["PROVIDERS"][provider]["COMPUTE"]:
files_to_compute.extend(expand("data/raw/{pid}/fitbit_sleep_summary_raw.csv", pid=config["PIDS"])) files_to_compute.extend(expand("data/raw/{pid}/fitbit_sleep_summary_raw.csv", pid=config["PIDS"]))
files_to_compute.extend(expand("data/raw/{pid}/fitbit_sleep_summary_parsed.csv", pid=config["PIDS"])) files_to_compute.extend(expand("data/raw/{pid}/fitbit_sleep_summary_with_datetime.csv", pid=config["PIDS"]))
files_to_compute.extend(expand("data/raw/{pid}/fitbit_sleep_summary_parsed_with_datetime.csv", pid=config["PIDS"]))
files_to_compute.extend(expand("data/interim/{pid}/fitbit_sleep_summary_features/fitbit_sleep_summary_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["FITBIT_SLEEP_SUMMARY"]["PROVIDERS"][provider]["SRC_LANGUAGE"].lower(), provider_key=provider.lower())) files_to_compute.extend(expand("data/interim/{pid}/fitbit_sleep_summary_features/fitbit_sleep_summary_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["FITBIT_SLEEP_SUMMARY"]["PROVIDERS"][provider]["SRC_LANGUAGE"].lower(), provider_key=provider.lower()))
files_to_compute.extend(expand("data/processed/features/{pid}/fitbit_sleep_summary.csv", pid=config["PIDS"])) files_to_compute.extend(expand("data/processed/features/{pid}/fitbit_sleep_summary.csv", pid=config["PIDS"]))
files_to_compute.extend(expand("data/processed/features/{pid}/all_sensor_features.csv", pid=config["PIDS"])) files_to_compute.extend(expand("data/processed/features/{pid}/all_sensor_features.csv", pid=config["PIDS"]))

View File

@ -340,6 +340,7 @@ FITBIT_DATA_STREAMS:
# AVAILABLE: # AVAILABLE:
fitbitjson_mysql: fitbitjson_mysql:
DATABASE_GROUP: MY_GROUP DATABASE_GROUP: MY_GROUP
SLEEP_SUMMARY_EPISODE_DAY_ANCHOR: end # summary sleep episodes are considered as events based on either the start timestamp or end timestamp.
# Sensors ------ # Sensors ------
@ -378,7 +379,6 @@ FITBIT_HEARTRATE_INTRADAY:
# See https://www.rapids.science/latest/features/fitbit-sleep-summary/ # See https://www.rapids.science/latest/features/fitbit-sleep-summary/
FITBIT_SLEEP_SUMMARY: FITBIT_SLEEP_SUMMARY:
TABLE: sleep_summary TABLE: sleep_summary
SLEEP_EPISODE_TIMESTAMP: end # summary sleep episodes are considered as events based on either the start timestamp or end timestamp.
PROVIDERS: PROVIDERS:
RAPIDS: RAPIDS:
COMPUTE: False COMPUTE: False

View File

@ -98,6 +98,53 @@ If you want RAPIDS to process Fitbit sensor data using this stream, you will nee
|a748ee1a-1d0b-4ae9-9074-279a2b6ba524 |{"activities-heart":[{"dateTime":"2020-10-08","value":{"customHeartRateZones":[],"heartRateZones":[{"caloriesOut":1100.1120,"max":89,"min":30,"minutes":921,"name":"Out of Range"},{"caloriesOut":660.0012,"max":118,"min":82,"minutes":361,"name":"Fat Burn"},{"caloriesOut":23.7088,"max":142,"min":108,"minutes":3,"name":"Cardio"},{"caloriesOut":0,"max":221,"min":148,"minutes":0,"name":"Peak"}],"restingHeartRate":70}}],"activities-heart-intraday":{"dataset":[{"time":"00:00:00","value":77},{"time":"00:01:00","value":75},{"time":"00:02:00","value":73},...],"datasetInterval":1,"datasetType":"minute"}} |a748ee1a-1d0b-4ae9-9074-279a2b6ba524 |{"activities-heart":[{"dateTime":"2020-10-08","value":{"customHeartRateZones":[],"heartRateZones":[{"caloriesOut":1100.1120,"max":89,"min":30,"minutes":921,"name":"Out of Range"},{"caloriesOut":660.0012,"max":118,"min":82,"minutes":361,"name":"Fat Burn"},{"caloriesOut":23.7088,"max":142,"min":108,"minutes":3,"name":"Cardio"},{"caloriesOut":0,"max":221,"min":148,"minutes":0,"name":"Peak"}],"restingHeartRate":70}}],"activities-heart-intraday":{"dataset":[{"time":"00:00:00","value":77},{"time":"00:01:00","value":75},{"time":"00:02:00","value":73},...],"datasetInterval":1,"datasetType":"minute"}}
|a748ee1a-1d0b-4ae9-9074-279a2b6ba524 |{"activities-heart":[{"dateTime":"2020-10-09","value":{"customHeartRateZones":[],"heartRateZones":[{"caloriesOut":750.3615,"max":77,"min":30,"minutes":851,"name":"Out of Range"},{"caloriesOut":734.1516,"max":107,"min":77,"minutes":550,"name":"Fat Burn"},{"caloriesOut":131.8579,"max":130,"min":107,"minutes":29,"name":"Cardio"},{"caloriesOut":0,"max":220,"min":130,"minutes":0,"name":"Peak"}],"restingHeartRate":69}}],"activities-heart-intraday":{"dataset":[{"time":"00:00:00","value":90},{"time":"00:01:00","value":89},{"time":"00:02:00","value":88},...],"datasetInterval":1,"datasetType":"minute"}} |a748ee1a-1d0b-4ae9-9074-279a2b6ba524 |{"activities-heart":[{"dateTime":"2020-10-09","value":{"customHeartRateZones":[],"heartRateZones":[{"caloriesOut":750.3615,"max":77,"min":30,"minutes":851,"name":"Out of Range"},{"caloriesOut":734.1516,"max":107,"min":77,"minutes":550,"name":"Fat Burn"},{"caloriesOut":131.8579,"max":130,"min":107,"minutes":29,"name":"Cardio"},{"caloriesOut":0,"max":220,"min":130,"minutes":0,"name":"Peak"}],"restingHeartRate":69}}],"activities-heart-intraday":{"dataset":[{"time":"00:00:00","value":90},{"time":"00:01:00","value":89},{"time":"00:02:00","value":88},...],"datasetInterval":1,"datasetType":"minute"}}
??? info "FITBIT_SLEEP_SUMMARY"
**RAPIDS_COLUMN_MAPPINGS**
| RAPIDS column | Stream column |
|-----------------|-----------------|
| TIMESTAMP | FLAG_TO_MUTATE |
| LOCAL_DATE_TIME | FLAG_TO_MUTATE |
| LOCAL_START_DATE_TIME | FLAG_TO_MUTATE |
| LOCAL_END_DATE_TIME | FLAG_TO_MUTATE |
| DEVICE_ID | device_id |
| EFFICIENCY | FLAG_TO_MUTATE |
| MINUTES_AFTER_WAKEUP | FLAG_TO_MUTATE |
| MINUTES_ASLEEP | FLAG_TO_MUTATE |
| MINUTES_AWAKE | FLAG_TO_MUTATE |
| MINUTES_TO_FALL_ASLEEP | FLAG_TO_MUTATE |
| MINUTES_IN_BED | FLAG_TO_MUTATE |
| IS_MAIN_SLEEP | FLAG_TO_MUTATE |
| TYPE | FLAG_TO_MUTATE |
**MUTATION**
- **COLUMN_MAPPINGS**
| Script column | Stream column |
|-----------------|-----------------|
| JSON_FITBIT_COLUMN | fitbit_data |
- **SCRIPTS**
```bash
src/data/streams/mutations/fitbit/parse_sleep_summary_json.py
```
!!! note
Fitbit API has two versions for sleep data, v1 and v1.2. We support both but ignore v1's `count_awake`, `duration_awake`, and `count_awakenings`, `count_restless`, `duration_restless` columns.
All columns except `DEVICE_ID` are parsed from `JSON_FITBIT_COLUMN`. `JSON_FITBIT_COLUMN` is a string column containing the JSON objects returned by Fitbit's API. See an example of the raw data RAPIDS expects for this data stream:
??? example "Example of the expected raw data"
|device_id |fitbit_data |
|---------------------------------------- |--------------------------------------------------------- |
|a748ee1a-1d0b-4ae9-9074-279a2b6ba524 |{"sleep":[{"dateOfSleep":"2020-10-10","duration":3600000,"efficiency":92,"endTime":"2020-10-10T16:37:00.000","infoCode":2,"isMainSleep":false,"levels":{"data":[{"dateTime":"2020-10-10T15:36:30.000","level":"restless","seconds":60},{"dateTime":"2020-10-10T15:37:30.000","level":"asleep","seconds":660},{"dateTime":"2020-10-10T15:48:30.000","level":"restless","seconds":60},...], "summary":{"asleep":{"count":0,"minutes":56},"awake":{"count":0,"minutes":0},"restless":{"count":3,"minutes":4}}},"logId":26315914306,"minutesAfterWakeup":0,"minutesAsleep":55,"minutesAwake":5,"minutesToFallAsleep":0,"startTime":"2020-10-10T15:36:30.000","timeInBed":60,"type":"classic"},{"dateOfSleep":"2020-10-10","duration":22980000,"efficiency":88,"endTime":"2020-10-10T08:10:00.000","infoCode":0,"isMainSleep":true,"levels":{"data":[{"dateTime":"2020-10-10T01:46:30.000","level":"light","seconds":420},{"dateTime":"2020-10-10T01:53:30.000","level":"deep","seconds":1230},{"dateTime":"2020-10-10T02:14:00.000","level":"light","seconds":360},...], "summary":{"deep":{"count":3,"minutes":92,"thirtyDayAvgMinutes":0},"light":{"count":29,"minutes":193,"thirtyDayAvgMinutes":0},"rem":{"count":4,"minutes":33,"thirtyDayAvgMinutes":0},"wake":{"count":28,"minutes":65,"thirtyDayAvgMinutes":0}}},"logId":26311786557,"minutesAfterWakeup":0,"minutesAsleep":318,"minutesAwake":65,"minutesToFallAsleep":0,"startTime":"2020-10-10T01:46:30.000","timeInBed":383,"type":"stages"}],"summary":{"stages":{"deep":92,"light":193,"rem":33,"wake":65},"totalMinutesAsleep":373,"totalSleepRecords":2,"totalTimeInBed":443}}
|a748ee1a-1d0b-4ae9-9074-279a2b6ba524 |{"sleep":[{"dateOfSleep":"2020-10-11","duration":41640000,"efficiency":89,"endTime":"2020-10-11T11:47:00.000","infoCode":0,"isMainSleep":true,"levels":{"data":[{"dateTime":"2020-10-11T00:12:30.000","level":"wake","seconds":450},{"dateTime":"2020-10-11T00:20:00.000","level":"light","seconds":870},{"dateTime":"2020-10-11T00:34:30.000","level":"wake","seconds":780},...], "summary":{"deep":{"count":4,"minutes":52,"thirtyDayAvgMinutes":62},"light":{"count":32,"minutes":442,"thirtyDayAvgMinutes":364},"rem":{"count":6,"minutes":68,"thirtyDayAvgMinutes":58},"wake":{"count":29,"minutes":132,"thirtyDayAvgMinutes":94}}},"logId":26589710670,"minutesAfterWakeup":1,"minutesAsleep":562,"minutesAwake":132,"minutesToFallAsleep":0,"startTime":"2020-10-11T00:12:30.000","timeInBed":694,"type":"stages"}],"summary":{"stages":{"deep":52,"light":442,"rem":68,"wake":132},"totalMinutesAsleep":562,"totalSleepRecords":1,"totalTimeInBed":694}}
|a748ee1a-1d0b-4ae9-9074-279a2b6ba524 |{"sleep":[{"dateOfSleep":"2020-10-12","duration":28980000,"efficiency":93,"endTime":"2020-10-12T09:34:30.000","infoCode":0,"isMainSleep":true,"levels":{"data":[{"dateTime":"2020-10-12T01:31:00.000","level":"wake","seconds":600},{"dateTime":"2020-10-12T01:41:00.000","level":"light","seconds":60},{"dateTime":"2020-10-12T01:42:00.000","level":"deep","seconds":2340},...], "summary":{"deep":{"count":4,"minutes":63,"thirtyDayAvgMinutes":59},"light":{"count":27,"minutes":257,"thirtyDayAvgMinutes":364},"rem":{"count":5,"minutes":94,"thirtyDayAvgMinutes":58},"wake":{"count":24,"minutes":69,"thirtyDayAvgMinutes":95}}},"logId":26589710673,"minutesAfterWakeup":0,"minutesAsleep":415,"minutesAwake":68,"minutesToFallAsleep":0,"startTime":"2020-10-12T01:31:00.000","timeInBed":483,"type":"stages"}],"summary":{"stages":{"deep":63,"light":257,"rem":94,"wake":69},"totalMinutesAsleep":415,"totalSleepRecords":1,"totalTimeInBed":483}}
??? info "FITBIT_STEPS_SUMMARY" ??? info "FITBIT_STEPS_SUMMARY"

View File

@ -25,6 +25,24 @@ This is a description of the format RAPIDS needs to process data for the followi
| HEARTRATE | Intraday heartrate | | HEARTRATE | Intraday heartrate |
| HEARTRATE_ZONE | Heartrate [zone](https://help.fitbit.com/articles/en_US/Help_article/1565.htm#) that HEARTRATE belongs to. It is based on the heartrate zone ranges of each device | | HEARTRATE_ZONE | Heartrate [zone](https://help.fitbit.com/articles/en_US/Help_article/1565.htm#) that HEARTRATE belongs to. It is based on the heartrate zone ranges of each device |
??? info "FITBIT_SLEEP_SUMMARY"
| RAPIDS column | Description |
|-----------------|-----------------|
| TIMESTAMP | An UNIX timestamp (13 digits) when a row of data was logged |
| LOCAL_DATE_TIME | Date time string with format `yyyy-mm-dd hh:mm:ss`, this either is a copy of LOCAL_START_DATE_TIME or LOCAL_END_DATE_TIME depending on which column is used to assign an episode to a specific day|
| LOCAL_START_DATE_TIME | Date time string with format `yyyy-mm-dd hh:mm:ss` representing the start of a daily sleep episode |
| LOCAL_END_DATE_TIME | Date time string with format `yyyy-mm-dd hh:mm:ss` representing the end of a daily sleep episode|
| DEVICE_ID | A string that uniquely identifies a device |
| EFFICIENCY | Sleep efficiency computed by fitbit as time asleep / (total time in bed - time to fall asleep)|
| MINUTES_AFTER_WAKEUP | Minutes the participant spent in bed after waking up|
| MINUTES_ASLEEP | Minutes the participant was asleep |
| MINUTES_AWAKE | Minutes the participant was awake |
| MINUTES_TO_FALL_ASLEEP | Minutes the participant spent in bed before falling asleep|
| MINUTES_IN_BED | Minutes the participant spent in bed across the sleep episode|
| IS_MAIN_SLEEP | 0 if this episode is a nap, or 1 if it is a main sleep episode|
| TYPE | stages or classic [sleep data](https://dev.fitbit.com/build/reference/web-api/sleep/)|
??? info "FITBIT_STEPS_SUMMARY" ??? info "FITBIT_STEPS_SUMMARY"
| RAPIDS column | Description | | RAPIDS column | Description |

View File

@ -636,7 +636,7 @@ rule fitbit_steps_intraday_r_features:
rule fitbit_sleep_summary_python_features: rule fitbit_sleep_summary_python_features:
input: input:
sensor_data = "data/raw/{pid}/fitbit_sleep_summary_parsed_with_datetime.csv", sensor_data = "data/raw/{pid}/fitbit_sleep_summary_with_datetime.csv",
time_segments_labels = "data/interim/time_segments/{pid}_time_segments_labels.csv" time_segments_labels = "data/interim/time_segments/{pid}_time_segments_labels.csv"
params: params:
provider = lambda wildcards: config["FITBIT_SLEEP_SUMMARY"]["PROVIDERS"][wildcards.provider_key.upper()], provider = lambda wildcards: config["FITBIT_SLEEP_SUMMARY"]["PROVIDERS"][wildcards.provider_key.upper()],
@ -649,7 +649,7 @@ rule fitbit_sleep_summary_python_features:
rule fitbit_sleep_summary_r_features: rule fitbit_sleep_summary_r_features:
input: input:
sensor_data = "data/raw/{pid}/fitbit_sleep_summary_parsed_with_datetime.csv", sensor_data = "data/raw/{pid}/fitbit_sleep_summary_with_datetime.csv",
time_segments_labels = "data/interim/time_segments/{pid}_time_segments_labels.csv" time_segments_labels = "data/interim/time_segments/{pid}_time_segments_labels.csv"
params: params:
provider = lambda wildcards: config["FITBIT_SLEEP_SUMMARY"]["PROVIDERS"][wildcards.provider_key.upper()], provider = lambda wildcards: config["FITBIT_SLEEP_SUMMARY"]["PROVIDERS"][wildcards.provider_key.upper()],

View File

@ -1,161 +0,0 @@
import yaml, json, sys
import pandas as pd
import numpy as np
from datetime import datetime, timezone
from math import trunc
HR_SUMMARY_COLUMNS = ("device_id",
"local_date_time",
"timestamp",
"heartrate_daily_restinghr",
"heartrate_daily_caloriesoutofrange",
"heartrate_daily_caloriesfatburn",
"heartrate_daily_caloriescardio",
"heartrate_daily_caloriespeak")
HR_INTRADAY_COLUMNS = ("device_id",
"heartrate",
"heartrate_zone",
"local_date_time",
"timestamp")
def parseHeartrateZones(heartrate_data):
# Get the range of heartrate zones: outofrange, fatburn, cardio, peak
# refer to: https://help.fitbit.com/articles/en_US/Help_article/1565
heartrate_fitbit_data = json.loads(heartrate_data["fitbit_data"].iloc[0])["activities-heart"][0]
# API Version X: not sure the exact version
if "heartRateZones" in heartrate_fitbit_data:
heartrate_zones = heartrate_fitbit_data["heartRateZones"]
# API VERSION Y: not sure the exact version
elif "value" in heartrate_fitbit_data:
heartrate_zones = heartrate_fitbit_data["value"]["heartRateZones"]
else:
raise ValueError("Heartrate zone are stored in an unkown format, this could mean Fitbit's heartrate API changed")
heartrate_zones_range = {}
for hrzone in heartrate_zones:
heartrate_zones_range[hrzone["name"].lower().replace(" ", "")] = [hrzone["min"], hrzone["max"]]
return heartrate_zones_range
def parseHeartrateSummaryData(record_summary, device_id, curr_date):
# API Version X: not sure the exact version
if "heartRateZones" in record_summary:
heartrate_zones = record_summary["heartRateZones"]
d_resting_heartrate = record_summary["value"] if "value" in record_summary else None
# API VERSION Y: not sure the exact version
elif "value" in record_summary:
heartrate_zones = record_summary["value"]["heartRateZones"]
d_resting_heartrate = record_summary["value"]["restingHeartRate"] if "restingHeartRate" in record_summary["value"] else None
else:
ValueError("Heartrate zone are stored in an unkown format, this could mean Fitbit's heartrate API changed")
if "caloriesOut" in heartrate_zones[0]:
d_calories_outofrange = heartrate_zones[0]["caloriesOut"]
d_calories_fatburn = heartrate_zones[1]["caloriesOut"]
d_calories_cardio = heartrate_zones[2]["caloriesOut"]
d_calories_peak = heartrate_zones[3]["caloriesOut"]
else:
d_calories_outofrange, d_calories_fatburn, d_calories_cardio, d_calories_peak = None, None, None, None
row_summary = (device_id,
curr_date,
0,
d_resting_heartrate,
d_calories_outofrange,
d_calories_fatburn,
d_calories_cardio,
d_calories_peak)
return row_summary
def parseHeartrateIntradayData(records_intraday, dataset, device_id, curr_date, heartrate_zones_range):
for data in dataset:
d_time = datetime.strptime(data["time"], '%H:%M:%S').time()
d_datetime = datetime.combine(curr_date, d_time)
d_hr = data["value"]
# Get heartrate zone by range: min <= heartrate < max
d_hrzone = None
for hrzone, hrrange in heartrate_zones_range.items():
if d_hr >= hrrange[0] and d_hr < hrrange[1]:
d_hrzone = hrzone
break
row_intraday = (device_id,
d_hr, d_hrzone,
d_datetime,
0)
records_intraday.append(row_intraday)
return records_intraday
def parseHeartrateData(heartrate_data, fitbit_data_type):
if heartrate_data.empty:
if fitbit_data_type == "summary":
return pd.DataFrame(columns=HR_SUMMARY_COLUMNS)
elif fitbit_data_type == "intraday":
return pd.DataFrame(columns=HR_INTRADAY_COLUMNS)
device_id = heartrate_data["device_id"].iloc[0]
records_summary, records_intraday = [], []
heartrate_zones_range = parseHeartrateZones(heartrate_data)
# Parse JSON into individual records
for record in heartrate_data.fitbit_data:
record = json.loads(record) # Parse text into JSON
curr_date = datetime.strptime(record["activities-heart"][0]["dateTime"], "%Y-%m-%d")
if fitbit_data_type == "summary":
record_summary = record["activities-heart"][0]
row_summary = parseHeartrateSummaryData(record_summary, device_id, curr_date)
records_summary.append(row_summary)
if fitbit_data_type == "intraday":
dataset = record["activities-heart-intraday"]["dataset"]
records_intraday = parseHeartrateIntradayData(records_intraday, dataset, device_id, curr_date, heartrate_zones_range)
if fitbit_data_type == "summary":
parsed_data = pd.DataFrame(data=records_summary, columns=HR_SUMMARY_COLUMNS)
elif fitbit_data_type == "intraday":
parsed_data = pd.DataFrame(data=records_intraday, columns=HR_INTRADAY_COLUMNS)
return parsed_data
timezone = snakemake.params["timezone"]
column_format = snakemake.params["column_format"]
fitbit_data_type = snakemake.params["fitbit_data_type"]
with open(snakemake.input["participant_file"], "r", encoding="utf-8") as f:
participant_file = yaml.safe_load(f)
local_start_date = pd.Timestamp(participant_file["FITBIT"]["START_DATE"])
local_end_date = pd.Timestamp(participant_file["FITBIT"]["END_DATE"]) + pd.DateOffset(1)
if column_format == "JSON":
json_raw = pd.read_csv(snakemake.input["raw_data"])
parsed_data = parseHeartrateData(json_raw, fitbit_data_type)
elif column_format == "PLAIN_TEXT":
parsed_data = pd.read_csv(snakemake.input["raw_data"], parse_dates=["local_date_time"], date_parser=lambda col: pd.to_datetime(col).tz_localize(None))
else:
raise ValueError("column_format can only be one of ['JSON', 'PLAIN_TEXT'].")
# discard rows with restinghr = 0
if fitbit_data_type == "summary":
parsed_data = parsed_data[(parsed_data["heartrate_daily_restinghr"] != "0") & (parsed_data["heartrate_daily_restinghr"] != 0)]
# Only keep dates in the range of [local_start_date, local_end_date)
if not pd.isnull(local_start_date) and not pd.isnull(local_end_date):
parsed_data = parsed_data.loc[(parsed_data["local_date_time"] >= local_start_date) & (parsed_data["local_date_time"] < local_end_date)]
if parsed_data.shape[0] > 0:
parsed_data["timestamp"] = parsed_data["local_date_time"].dt.tz_localize(timezone, ambiguous=False, nonexistent="NaT").dropna().astype(np.int64) // 10**6
parsed_data.dropna(subset=['timestamp'], inplace=True)
parsed_data.to_csv(snakemake.output[0], index=False)

View File

@ -27,6 +27,26 @@ FITBIT_HEARTRATE_INTRADAY:
SCRIPTS: # List any python or r scripts that mutate your raw data SCRIPTS: # List any python or r scripts that mutate your raw data
- src/data/streams/mutations/fitbit/parse_heartrate_intraday_json.py - src/data/streams/mutations/fitbit/parse_heartrate_intraday_json.py
FITBIT_SLEEP_SUMMARY:
RAPIDS_COLUMN_MAPPINGS:
TIMESTAMP: FLAG_TO_MUTATE
DEVICE_ID: device_id
LOCAL_DATE_TIME: FLAG_TO_MUTATE
LOCAL_START_DATE_TIME: FLAG_TO_MUTATE
LOCAL_END_DATE_TIME: FLAG_TO_MUTATE
EFFICIENCY: FLAG_TO_MUTATE
MINUTES_AFTER_WAKEUP: FLAG_TO_MUTATE
MINUTES_ASLEEP: FLAG_TO_MUTATE
MINUTES_AWAKE: FLAG_TO_MUTATE
MINUTES_TO_FALL_ASLEEP: FLAG_TO_MUTATE
MINUTES_IN_BED: FLAG_TO_MUTATE
IS_MAIN_SLEEP: FLAG_TO_MUTATE
TYPE: FLAG_TO_MUTATE
MUTATION:
COLUMN_MAPPINGS:
JSON_FITBIT_COLUMN: fitbit_data # text column with JSON objects
SCRIPTS: # List any python or r scripts that mutate your raw data
- src/data/streams/mutations/fitbit/parse_sleep_summary_json.py
FITBIT_STEPS_SUMMARY: FITBIT_STEPS_SUMMARY:
RAPIDS_COLUMN_MAPPINGS: RAPIDS_COLUMN_MAPPINGS:

View File

@ -0,0 +1,70 @@
import json, yaml
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import dateutil.parser
SLEEP_SUMMARY_COLUMNS = ("device_id", "efficiency",
"minutes_after_wakeup", "minutes_asleep", "minutes_awake", "minutes_to_fall_asleep", "minutes_in_bed",
"is_main_sleep", "type",
"local_start_date_time", "local_end_date_time",
"timestamp")
# Parse one record for sleep API version 1.2
def parseOneSleepRecord(record, device_id, d_is_main_sleep, records_summary, episode_type):
sleep_record_type = episode_type
d_start_datetime = datetime.strptime(record["startTime"][:18], "%Y-%m-%dT%H:%M:%S")
d_end_datetime = datetime.strptime(record["endTime"][:18], "%Y-%m-%dT%H:%M:%S")
# Summary data
row_summary = (device_id, record["efficiency"],
record["minutesAfterWakeup"], record["minutesAsleep"], record["minutesAwake"], record["minutesToFallAsleep"], record["timeInBed"],
d_is_main_sleep, sleep_record_type,
d_start_datetime, d_end_datetime,
0)
records_summary.append(row_summary)
return records_summary
def parseSleepData(sleep_data):
if sleep_data.empty:
return pd.DataFrame(columns=SLEEP_SUMMARY_COLUMNS)
device_id = sleep_data["device_id"].iloc[0]
records_summary = []
# Parse JSON into individual records
for multi_record in sleep_data.json_fitbit_column:
sleep_record = json.loads(multi_record)
if "sleep" in sleep_record:
for record in sleep_record["sleep"]:
# Whether the sleep episode is nap (0) or main sleep (1)
d_is_main_sleep = 1 if record["isMainSleep"] else 0
# For sleep API version 1
if "awakeCount" in record:
records_summary = parseOneSleepRecord(record, device_id, d_is_main_sleep, records_summary, "classic")
# For sleep API version 1.2
else:
records_summary = parseOneSleepRecord(record, device_id, d_is_main_sleep, records_summary, record['type'])
parsed_data = pd.DataFrame(data=records_summary, columns=SLEEP_SUMMARY_COLUMNS)
return parsed_data
def main(json_raw, stream_parameters):
parsed_data = parseSleepData(json_raw)
parsed_data["timestamp"] = 0 # this column is added at readable_datetime.R because we neeed to take into account multiple timezones
parsed_data['local_start_date_time'] = parsed_data['local_start_date_time'].dt.strftime('%Y-%m-%d %H:%M:%S')
parsed_data['local_end_date_time'] = parsed_data['local_end_date_time'].dt.strftime('%Y-%m-%d %H:%M:%S')
if stream_parameters["SLEEP_SUMMARY_EPISODE_DAY_ANCHOR"] == "start":
parsed_data["local_date_time"] = parsed_data['local_start_date_time']
else:
parsed_data["local_date_time"] = parsed_data['local_end_date_time']
return(parsed_data)

View File

@ -144,6 +144,21 @@ FITBIT_HEARTRATE_INTRADAY:
- HEARTRATE - HEARTRATE
- HEARTRATE_ZONE - HEARTRATE_ZONE
FITBIT_SLEEP_SUMMARY:
- TIMESTAMP
- DEVICE_ID
- LOCAL_DATE_TIME
- LOCAL_START_DATE_TIME
- LOCAL_END_DATE_TIME
- EFFICIENCY
- MINUTES_AFTER_WAKEUP
- MINUTES_ASLEEP
- MINUTES_AWAKE
- MINUTES_TO_FALL_ASLEEP
- MINUTES_IN_BED
- IS_MAIN_SLEEP
- TYPE
FITBIT_STEPS_SUMMARY: FITBIT_STEPS_SUMMARY:
- TIMESTAMP - TIMESTAMP
- DEVICE_ID - DEVICE_ID

View File

@ -832,9 +832,13 @@ properties:
type: string type: string
fitbitjson_mysql: fitbitjson_mysql:
type: object type: object
required: [DATABASE_GROUP, SLEEP_SUMMARY_EPISODE_DAY_ANCHOR]
properties: properties:
DATABASE_GROUP: DATABASE_GROUP:
type: string type: string
SLEEP_SUMMARY_EPISODE_DAY_ANCHOR:
type: string
enum: ["start", "end"]
FITBIT_DATA_YIELD: FITBIT_DATA_YIELD:
type: object type: object
@ -903,13 +907,10 @@ properties:
FITBIT_SLEEP_SUMMARY: FITBIT_SLEEP_SUMMARY:
type: object type: object
required: [TABLE, SLEEP_EPISODE_TIMESTAMP, PROVIDERS] required: [TABLE, PROVIDERS]
properties: properties:
TABLE: TABLE:
type: string type: string
SLEEP_EPISODE_TIMESTAMP:
type: string
enum: ["start", "end"]
PROVIDERS: PROVIDERS:
type: ["null", object] type: ["null", object]
properties: properties: