Add sleep summary to fitbitjson_mysql
parent
cf0afeb08d
commit
a420f5ef92
|
@ -244,8 +244,7 @@ for provider in config["FITBIT_HEARTRATE_INTRADAY"]["PROVIDERS"].keys():
|
|||
for provider in config["FITBIT_SLEEP_SUMMARY"]["PROVIDERS"].keys():
|
||||
if config["FITBIT_SLEEP_SUMMARY"]["PROVIDERS"][provider]["COMPUTE"]:
|
||||
files_to_compute.extend(expand("data/raw/{pid}/fitbit_sleep_summary_raw.csv", pid=config["PIDS"]))
|
||||
files_to_compute.extend(expand("data/raw/{pid}/fitbit_sleep_summary_parsed.csv", pid=config["PIDS"]))
|
||||
files_to_compute.extend(expand("data/raw/{pid}/fitbit_sleep_summary_parsed_with_datetime.csv", pid=config["PIDS"]))
|
||||
files_to_compute.extend(expand("data/raw/{pid}/fitbit_sleep_summary_with_datetime.csv", pid=config["PIDS"]))
|
||||
files_to_compute.extend(expand("data/interim/{pid}/fitbit_sleep_summary_features/fitbit_sleep_summary_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["FITBIT_SLEEP_SUMMARY"]["PROVIDERS"][provider]["SRC_LANGUAGE"].lower(), provider_key=provider.lower()))
|
||||
files_to_compute.extend(expand("data/processed/features/{pid}/fitbit_sleep_summary.csv", pid=config["PIDS"]))
|
||||
files_to_compute.extend(expand("data/processed/features/{pid}/all_sensor_features.csv", pid=config["PIDS"]))
|
||||
|
|
|
@ -340,6 +340,7 @@ FITBIT_DATA_STREAMS:
|
|||
# AVAILABLE:
|
||||
fitbitjson_mysql:
|
||||
DATABASE_GROUP: MY_GROUP
|
||||
SLEEP_SUMMARY_EPISODE_DAY_ANCHOR: end # summary sleep episodes are considered as events based on either the start timestamp or end timestamp.
|
||||
|
||||
# Sensors ------
|
||||
|
||||
|
@ -378,7 +379,6 @@ FITBIT_HEARTRATE_INTRADAY:
|
|||
# See https://www.rapids.science/latest/features/fitbit-sleep-summary/
|
||||
FITBIT_SLEEP_SUMMARY:
|
||||
TABLE: sleep_summary
|
||||
SLEEP_EPISODE_TIMESTAMP: end # summary sleep episodes are considered as events based on either the start timestamp or end timestamp.
|
||||
PROVIDERS:
|
||||
RAPIDS:
|
||||
COMPUTE: False
|
||||
|
|
|
@ -98,6 +98,53 @@ If you want RAPIDS to process Fitbit sensor data using this stream, you will nee
|
|||
|a748ee1a-1d0b-4ae9-9074-279a2b6ba524 |{"activities-heart":[{"dateTime":"2020-10-08","value":{"customHeartRateZones":[],"heartRateZones":[{"caloriesOut":1100.1120,"max":89,"min":30,"minutes":921,"name":"Out of Range"},{"caloriesOut":660.0012,"max":118,"min":82,"minutes":361,"name":"Fat Burn"},{"caloriesOut":23.7088,"max":142,"min":108,"minutes":3,"name":"Cardio"},{"caloriesOut":0,"max":221,"min":148,"minutes":0,"name":"Peak"}],"restingHeartRate":70}}],"activities-heart-intraday":{"dataset":[{"time":"00:00:00","value":77},{"time":"00:01:00","value":75},{"time":"00:02:00","value":73},...],"datasetInterval":1,"datasetType":"minute"}}
|
||||
|a748ee1a-1d0b-4ae9-9074-279a2b6ba524 |{"activities-heart":[{"dateTime":"2020-10-09","value":{"customHeartRateZones":[],"heartRateZones":[{"caloriesOut":750.3615,"max":77,"min":30,"minutes":851,"name":"Out of Range"},{"caloriesOut":734.1516,"max":107,"min":77,"minutes":550,"name":"Fat Burn"},{"caloriesOut":131.8579,"max":130,"min":107,"minutes":29,"name":"Cardio"},{"caloriesOut":0,"max":220,"min":130,"minutes":0,"name":"Peak"}],"restingHeartRate":69}}],"activities-heart-intraday":{"dataset":[{"time":"00:00:00","value":90},{"time":"00:01:00","value":89},{"time":"00:02:00","value":88},...],"datasetInterval":1,"datasetType":"minute"}}
|
||||
|
||||
??? info "FITBIT_SLEEP_SUMMARY"
|
||||
|
||||
**RAPIDS_COLUMN_MAPPINGS**
|
||||
|
||||
| RAPIDS column | Stream column |
|
||||
|-----------------|-----------------|
|
||||
| TIMESTAMP | FLAG_TO_MUTATE |
|
||||
| LOCAL_DATE_TIME | FLAG_TO_MUTATE |
|
||||
| LOCAL_START_DATE_TIME | FLAG_TO_MUTATE |
|
||||
| LOCAL_END_DATE_TIME | FLAG_TO_MUTATE |
|
||||
| DEVICE_ID | device_id |
|
||||
| EFFICIENCY | FLAG_TO_MUTATE |
|
||||
| MINUTES_AFTER_WAKEUP | FLAG_TO_MUTATE |
|
||||
| MINUTES_ASLEEP | FLAG_TO_MUTATE |
|
||||
| MINUTES_AWAKE | FLAG_TO_MUTATE |
|
||||
| MINUTES_TO_FALL_ASLEEP | FLAG_TO_MUTATE |
|
||||
| MINUTES_IN_BED | FLAG_TO_MUTATE |
|
||||
| IS_MAIN_SLEEP | FLAG_TO_MUTATE |
|
||||
| TYPE | FLAG_TO_MUTATE |
|
||||
|
||||
**MUTATION**
|
||||
|
||||
- **COLUMN_MAPPINGS**
|
||||
|
||||
| Script column | Stream column |
|
||||
|-----------------|-----------------|
|
||||
| JSON_FITBIT_COLUMN | fitbit_data |
|
||||
|
||||
- **SCRIPTS**
|
||||
|
||||
```bash
|
||||
src/data/streams/mutations/fitbit/parse_sleep_summary_json.py
|
||||
```
|
||||
|
||||
!!! note
|
||||
|
||||
Fitbit API has two versions for sleep data, v1 and v1.2. We support both but ignore v1's `count_awake`, `duration_awake`, and `count_awakenings`, `count_restless`, `duration_restless` columns.
|
||||
|
||||
All columns except `DEVICE_ID` are parsed from `JSON_FITBIT_COLUMN`. `JSON_FITBIT_COLUMN` is a string column containing the JSON objects returned by Fitbit's API. See an example of the raw data RAPIDS expects for this data stream:
|
||||
|
||||
??? example "Example of the expected raw data"
|
||||
|
||||
|device_id |fitbit_data |
|
||||
|---------------------------------------- |--------------------------------------------------------- |
|
||||
|a748ee1a-1d0b-4ae9-9074-279a2b6ba524 |{"sleep":[{"dateOfSleep":"2020-10-10","duration":3600000,"efficiency":92,"endTime":"2020-10-10T16:37:00.000","infoCode":2,"isMainSleep":false,"levels":{"data":[{"dateTime":"2020-10-10T15:36:30.000","level":"restless","seconds":60},{"dateTime":"2020-10-10T15:37:30.000","level":"asleep","seconds":660},{"dateTime":"2020-10-10T15:48:30.000","level":"restless","seconds":60},...], "summary":{"asleep":{"count":0,"minutes":56},"awake":{"count":0,"minutes":0},"restless":{"count":3,"minutes":4}}},"logId":26315914306,"minutesAfterWakeup":0,"minutesAsleep":55,"minutesAwake":5,"minutesToFallAsleep":0,"startTime":"2020-10-10T15:36:30.000","timeInBed":60,"type":"classic"},{"dateOfSleep":"2020-10-10","duration":22980000,"efficiency":88,"endTime":"2020-10-10T08:10:00.000","infoCode":0,"isMainSleep":true,"levels":{"data":[{"dateTime":"2020-10-10T01:46:30.000","level":"light","seconds":420},{"dateTime":"2020-10-10T01:53:30.000","level":"deep","seconds":1230},{"dateTime":"2020-10-10T02:14:00.000","level":"light","seconds":360},...], "summary":{"deep":{"count":3,"minutes":92,"thirtyDayAvgMinutes":0},"light":{"count":29,"minutes":193,"thirtyDayAvgMinutes":0},"rem":{"count":4,"minutes":33,"thirtyDayAvgMinutes":0},"wake":{"count":28,"minutes":65,"thirtyDayAvgMinutes":0}}},"logId":26311786557,"minutesAfterWakeup":0,"minutesAsleep":318,"minutesAwake":65,"minutesToFallAsleep":0,"startTime":"2020-10-10T01:46:30.000","timeInBed":383,"type":"stages"}],"summary":{"stages":{"deep":92,"light":193,"rem":33,"wake":65},"totalMinutesAsleep":373,"totalSleepRecords":2,"totalTimeInBed":443}}
|
||||
|a748ee1a-1d0b-4ae9-9074-279a2b6ba524 |{"sleep":[{"dateOfSleep":"2020-10-11","duration":41640000,"efficiency":89,"endTime":"2020-10-11T11:47:00.000","infoCode":0,"isMainSleep":true,"levels":{"data":[{"dateTime":"2020-10-11T00:12:30.000","level":"wake","seconds":450},{"dateTime":"2020-10-11T00:20:00.000","level":"light","seconds":870},{"dateTime":"2020-10-11T00:34:30.000","level":"wake","seconds":780},...], "summary":{"deep":{"count":4,"minutes":52,"thirtyDayAvgMinutes":62},"light":{"count":32,"minutes":442,"thirtyDayAvgMinutes":364},"rem":{"count":6,"minutes":68,"thirtyDayAvgMinutes":58},"wake":{"count":29,"minutes":132,"thirtyDayAvgMinutes":94}}},"logId":26589710670,"minutesAfterWakeup":1,"minutesAsleep":562,"minutesAwake":132,"minutesToFallAsleep":0,"startTime":"2020-10-11T00:12:30.000","timeInBed":694,"type":"stages"}],"summary":{"stages":{"deep":52,"light":442,"rem":68,"wake":132},"totalMinutesAsleep":562,"totalSleepRecords":1,"totalTimeInBed":694}}
|
||||
|a748ee1a-1d0b-4ae9-9074-279a2b6ba524 |{"sleep":[{"dateOfSleep":"2020-10-12","duration":28980000,"efficiency":93,"endTime":"2020-10-12T09:34:30.000","infoCode":0,"isMainSleep":true,"levels":{"data":[{"dateTime":"2020-10-12T01:31:00.000","level":"wake","seconds":600},{"dateTime":"2020-10-12T01:41:00.000","level":"light","seconds":60},{"dateTime":"2020-10-12T01:42:00.000","level":"deep","seconds":2340},...], "summary":{"deep":{"count":4,"minutes":63,"thirtyDayAvgMinutes":59},"light":{"count":27,"minutes":257,"thirtyDayAvgMinutes":364},"rem":{"count":5,"minutes":94,"thirtyDayAvgMinutes":58},"wake":{"count":24,"minutes":69,"thirtyDayAvgMinutes":95}}},"logId":26589710673,"minutesAfterWakeup":0,"minutesAsleep":415,"minutesAwake":68,"minutesToFallAsleep":0,"startTime":"2020-10-12T01:31:00.000","timeInBed":483,"type":"stages"}],"summary":{"stages":{"deep":63,"light":257,"rem":94,"wake":69},"totalMinutesAsleep":415,"totalSleepRecords":1,"totalTimeInBed":483}}
|
||||
|
||||
??? info "FITBIT_STEPS_SUMMARY"
|
||||
|
||||
|
|
|
@ -25,6 +25,24 @@ This is a description of the format RAPIDS needs to process data for the followi
|
|||
| HEARTRATE | Intraday heartrate |
|
||||
| HEARTRATE_ZONE | Heartrate [zone](https://help.fitbit.com/articles/en_US/Help_article/1565.htm#) that HEARTRATE belongs to. It is based on the heartrate zone ranges of each device |
|
||||
|
||||
??? info "FITBIT_SLEEP_SUMMARY"
|
||||
|
||||
| RAPIDS column | Description |
|
||||
|-----------------|-----------------|
|
||||
| TIMESTAMP | An UNIX timestamp (13 digits) when a row of data was logged |
|
||||
| LOCAL_DATE_TIME | Date time string with format `yyyy-mm-dd hh:mm:ss`, this either is a copy of LOCAL_START_DATE_TIME or LOCAL_END_DATE_TIME depending on which column is used to assign an episode to a specific day|
|
||||
| LOCAL_START_DATE_TIME | Date time string with format `yyyy-mm-dd hh:mm:ss` representing the start of a daily sleep episode |
|
||||
| LOCAL_END_DATE_TIME | Date time string with format `yyyy-mm-dd hh:mm:ss` representing the end of a daily sleep episode|
|
||||
| DEVICE_ID | A string that uniquely identifies a device |
|
||||
| EFFICIENCY | Sleep efficiency computed by fitbit as time asleep / (total time in bed - time to fall asleep)|
|
||||
| MINUTES_AFTER_WAKEUP | Minutes the participant spent in bed after waking up|
|
||||
| MINUTES_ASLEEP | Minutes the participant was asleep |
|
||||
| MINUTES_AWAKE | Minutes the participant was awake |
|
||||
| MINUTES_TO_FALL_ASLEEP | Minutes the participant spent in bed before falling asleep|
|
||||
| MINUTES_IN_BED | Minutes the participant spent in bed across the sleep episode|
|
||||
| IS_MAIN_SLEEP | 0 if this episode is a nap, or 1 if it is a main sleep episode|
|
||||
| TYPE | stages or classic [sleep data](https://dev.fitbit.com/build/reference/web-api/sleep/)|
|
||||
|
||||
??? info "FITBIT_STEPS_SUMMARY"
|
||||
|
||||
| RAPIDS column | Description |
|
||||
|
|
|
@ -636,7 +636,7 @@ rule fitbit_steps_intraday_r_features:
|
|||
|
||||
rule fitbit_sleep_summary_python_features:
|
||||
input:
|
||||
sensor_data = "data/raw/{pid}/fitbit_sleep_summary_parsed_with_datetime.csv",
|
||||
sensor_data = "data/raw/{pid}/fitbit_sleep_summary_with_datetime.csv",
|
||||
time_segments_labels = "data/interim/time_segments/{pid}_time_segments_labels.csv"
|
||||
params:
|
||||
provider = lambda wildcards: config["FITBIT_SLEEP_SUMMARY"]["PROVIDERS"][wildcards.provider_key.upper()],
|
||||
|
@ -649,7 +649,7 @@ rule fitbit_sleep_summary_python_features:
|
|||
|
||||
rule fitbit_sleep_summary_r_features:
|
||||
input:
|
||||
sensor_data = "data/raw/{pid}/fitbit_sleep_summary_parsed_with_datetime.csv",
|
||||
sensor_data = "data/raw/{pid}/fitbit_sleep_summary_with_datetime.csv",
|
||||
time_segments_labels = "data/interim/time_segments/{pid}_time_segments_labels.csv"
|
||||
params:
|
||||
provider = lambda wildcards: config["FITBIT_SLEEP_SUMMARY"]["PROVIDERS"][wildcards.provider_key.upper()],
|
||||
|
|
|
@ -1,161 +0,0 @@
|
|||
import yaml, json, sys
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
from datetime import datetime, timezone
|
||||
from math import trunc
|
||||
|
||||
|
||||
HR_SUMMARY_COLUMNS = ("device_id",
|
||||
"local_date_time",
|
||||
"timestamp",
|
||||
"heartrate_daily_restinghr",
|
||||
"heartrate_daily_caloriesoutofrange",
|
||||
"heartrate_daily_caloriesfatburn",
|
||||
"heartrate_daily_caloriescardio",
|
||||
"heartrate_daily_caloriespeak")
|
||||
|
||||
HR_INTRADAY_COLUMNS = ("device_id",
|
||||
"heartrate",
|
||||
"heartrate_zone",
|
||||
"local_date_time",
|
||||
"timestamp")
|
||||
|
||||
def parseHeartrateZones(heartrate_data):
|
||||
# Get the range of heartrate zones: outofrange, fatburn, cardio, peak
|
||||
# refer to: https://help.fitbit.com/articles/en_US/Help_article/1565
|
||||
|
||||
heartrate_fitbit_data = json.loads(heartrate_data["fitbit_data"].iloc[0])["activities-heart"][0]
|
||||
# API Version X: not sure the exact version
|
||||
if "heartRateZones" in heartrate_fitbit_data:
|
||||
heartrate_zones = heartrate_fitbit_data["heartRateZones"]
|
||||
# API VERSION Y: not sure the exact version
|
||||
elif "value" in heartrate_fitbit_data:
|
||||
heartrate_zones = heartrate_fitbit_data["value"]["heartRateZones"]
|
||||
else:
|
||||
raise ValueError("Heartrate zone are stored in an unkown format, this could mean Fitbit's heartrate API changed")
|
||||
|
||||
heartrate_zones_range = {}
|
||||
for hrzone in heartrate_zones:
|
||||
heartrate_zones_range[hrzone["name"].lower().replace(" ", "")] = [hrzone["min"], hrzone["max"]]
|
||||
return heartrate_zones_range
|
||||
|
||||
def parseHeartrateSummaryData(record_summary, device_id, curr_date):
|
||||
# API Version X: not sure the exact version
|
||||
if "heartRateZones" in record_summary:
|
||||
heartrate_zones = record_summary["heartRateZones"]
|
||||
d_resting_heartrate = record_summary["value"] if "value" in record_summary else None
|
||||
# API VERSION Y: not sure the exact version
|
||||
elif "value" in record_summary:
|
||||
heartrate_zones = record_summary["value"]["heartRateZones"]
|
||||
d_resting_heartrate = record_summary["value"]["restingHeartRate"] if "restingHeartRate" in record_summary["value"] else None
|
||||
else:
|
||||
ValueError("Heartrate zone are stored in an unkown format, this could mean Fitbit's heartrate API changed")
|
||||
|
||||
if "caloriesOut" in heartrate_zones[0]:
|
||||
d_calories_outofrange = heartrate_zones[0]["caloriesOut"]
|
||||
d_calories_fatburn = heartrate_zones[1]["caloriesOut"]
|
||||
d_calories_cardio = heartrate_zones[2]["caloriesOut"]
|
||||
d_calories_peak = heartrate_zones[3]["caloriesOut"]
|
||||
else:
|
||||
d_calories_outofrange, d_calories_fatburn, d_calories_cardio, d_calories_peak = None, None, None, None
|
||||
|
||||
row_summary = (device_id,
|
||||
curr_date,
|
||||
0,
|
||||
d_resting_heartrate,
|
||||
d_calories_outofrange,
|
||||
d_calories_fatburn,
|
||||
d_calories_cardio,
|
||||
d_calories_peak)
|
||||
return row_summary
|
||||
|
||||
|
||||
|
||||
|
||||
def parseHeartrateIntradayData(records_intraday, dataset, device_id, curr_date, heartrate_zones_range):
|
||||
for data in dataset:
|
||||
d_time = datetime.strptime(data["time"], '%H:%M:%S').time()
|
||||
d_datetime = datetime.combine(curr_date, d_time)
|
||||
d_hr = data["value"]
|
||||
|
||||
# Get heartrate zone by range: min <= heartrate < max
|
||||
d_hrzone = None
|
||||
for hrzone, hrrange in heartrate_zones_range.items():
|
||||
if d_hr >= hrrange[0] and d_hr < hrrange[1]:
|
||||
d_hrzone = hrzone
|
||||
break
|
||||
|
||||
row_intraday = (device_id,
|
||||
d_hr, d_hrzone,
|
||||
d_datetime,
|
||||
0)
|
||||
|
||||
records_intraday.append(row_intraday)
|
||||
return records_intraday
|
||||
|
||||
|
||||
|
||||
def parseHeartrateData(heartrate_data, fitbit_data_type):
|
||||
if heartrate_data.empty:
|
||||
if fitbit_data_type == "summary":
|
||||
return pd.DataFrame(columns=HR_SUMMARY_COLUMNS)
|
||||
elif fitbit_data_type == "intraday":
|
||||
return pd.DataFrame(columns=HR_INTRADAY_COLUMNS)
|
||||
|
||||
device_id = heartrate_data["device_id"].iloc[0]
|
||||
records_summary, records_intraday = [], []
|
||||
|
||||
heartrate_zones_range = parseHeartrateZones(heartrate_data)
|
||||
|
||||
# Parse JSON into individual records
|
||||
for record in heartrate_data.fitbit_data:
|
||||
record = json.loads(record) # Parse text into JSON
|
||||
curr_date = datetime.strptime(record["activities-heart"][0]["dateTime"], "%Y-%m-%d")
|
||||
|
||||
if fitbit_data_type == "summary":
|
||||
record_summary = record["activities-heart"][0]
|
||||
row_summary = parseHeartrateSummaryData(record_summary, device_id, curr_date)
|
||||
records_summary.append(row_summary)
|
||||
|
||||
if fitbit_data_type == "intraday":
|
||||
dataset = record["activities-heart-intraday"]["dataset"]
|
||||
records_intraday = parseHeartrateIntradayData(records_intraday, dataset, device_id, curr_date, heartrate_zones_range)
|
||||
|
||||
if fitbit_data_type == "summary":
|
||||
parsed_data = pd.DataFrame(data=records_summary, columns=HR_SUMMARY_COLUMNS)
|
||||
elif fitbit_data_type == "intraday":
|
||||
parsed_data = pd.DataFrame(data=records_intraday, columns=HR_INTRADAY_COLUMNS)
|
||||
return parsed_data
|
||||
|
||||
|
||||
|
||||
timezone = snakemake.params["timezone"]
|
||||
column_format = snakemake.params["column_format"]
|
||||
fitbit_data_type = snakemake.params["fitbit_data_type"]
|
||||
|
||||
with open(snakemake.input["participant_file"], "r", encoding="utf-8") as f:
|
||||
participant_file = yaml.safe_load(f)
|
||||
local_start_date = pd.Timestamp(participant_file["FITBIT"]["START_DATE"])
|
||||
local_end_date = pd.Timestamp(participant_file["FITBIT"]["END_DATE"]) + pd.DateOffset(1)
|
||||
|
||||
if column_format == "JSON":
|
||||
json_raw = pd.read_csv(snakemake.input["raw_data"])
|
||||
parsed_data = parseHeartrateData(json_raw, fitbit_data_type)
|
||||
elif column_format == "PLAIN_TEXT":
|
||||
parsed_data = pd.read_csv(snakemake.input["raw_data"], parse_dates=["local_date_time"], date_parser=lambda col: pd.to_datetime(col).tz_localize(None))
|
||||
else:
|
||||
raise ValueError("column_format can only be one of ['JSON', 'PLAIN_TEXT'].")
|
||||
|
||||
# discard rows with restinghr = 0
|
||||
if fitbit_data_type == "summary":
|
||||
parsed_data = parsed_data[(parsed_data["heartrate_daily_restinghr"] != "0") & (parsed_data["heartrate_daily_restinghr"] != 0)]
|
||||
|
||||
# Only keep dates in the range of [local_start_date, local_end_date)
|
||||
if not pd.isnull(local_start_date) and not pd.isnull(local_end_date):
|
||||
parsed_data = parsed_data.loc[(parsed_data["local_date_time"] >= local_start_date) & (parsed_data["local_date_time"] < local_end_date)]
|
||||
|
||||
if parsed_data.shape[0] > 0:
|
||||
parsed_data["timestamp"] = parsed_data["local_date_time"].dt.tz_localize(timezone, ambiguous=False, nonexistent="NaT").dropna().astype(np.int64) // 10**6
|
||||
parsed_data.dropna(subset=['timestamp'], inplace=True)
|
||||
|
||||
parsed_data.to_csv(snakemake.output[0], index=False)
|
|
@ -27,6 +27,26 @@ FITBIT_HEARTRATE_INTRADAY:
|
|||
SCRIPTS: # List any python or r scripts that mutate your raw data
|
||||
- src/data/streams/mutations/fitbit/parse_heartrate_intraday_json.py
|
||||
|
||||
FITBIT_SLEEP_SUMMARY:
|
||||
RAPIDS_COLUMN_MAPPINGS:
|
||||
TIMESTAMP: FLAG_TO_MUTATE
|
||||
DEVICE_ID: device_id
|
||||
LOCAL_DATE_TIME: FLAG_TO_MUTATE
|
||||
LOCAL_START_DATE_TIME: FLAG_TO_MUTATE
|
||||
LOCAL_END_DATE_TIME: FLAG_TO_MUTATE
|
||||
EFFICIENCY: FLAG_TO_MUTATE
|
||||
MINUTES_AFTER_WAKEUP: FLAG_TO_MUTATE
|
||||
MINUTES_ASLEEP: FLAG_TO_MUTATE
|
||||
MINUTES_AWAKE: FLAG_TO_MUTATE
|
||||
MINUTES_TO_FALL_ASLEEP: FLAG_TO_MUTATE
|
||||
MINUTES_IN_BED: FLAG_TO_MUTATE
|
||||
IS_MAIN_SLEEP: FLAG_TO_MUTATE
|
||||
TYPE: FLAG_TO_MUTATE
|
||||
MUTATION:
|
||||
COLUMN_MAPPINGS:
|
||||
JSON_FITBIT_COLUMN: fitbit_data # text column with JSON objects
|
||||
SCRIPTS: # List any python or r scripts that mutate your raw data
|
||||
- src/data/streams/mutations/fitbit/parse_sleep_summary_json.py
|
||||
|
||||
FITBIT_STEPS_SUMMARY:
|
||||
RAPIDS_COLUMN_MAPPINGS:
|
||||
|
|
|
@ -0,0 +1,70 @@
|
|||
import json, yaml
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
from datetime import datetime, timedelta
|
||||
import dateutil.parser
|
||||
|
||||
SLEEP_SUMMARY_COLUMNS = ("device_id", "efficiency",
|
||||
"minutes_after_wakeup", "minutes_asleep", "minutes_awake", "minutes_to_fall_asleep", "minutes_in_bed",
|
||||
"is_main_sleep", "type",
|
||||
"local_start_date_time", "local_end_date_time",
|
||||
"timestamp")
|
||||
|
||||
|
||||
# Parse one record for sleep API version 1.2
|
||||
def parseOneSleepRecord(record, device_id, d_is_main_sleep, records_summary, episode_type):
|
||||
|
||||
sleep_record_type = episode_type
|
||||
|
||||
d_start_datetime = datetime.strptime(record["startTime"][:18], "%Y-%m-%dT%H:%M:%S")
|
||||
d_end_datetime = datetime.strptime(record["endTime"][:18], "%Y-%m-%dT%H:%M:%S")
|
||||
# Summary data
|
||||
row_summary = (device_id, record["efficiency"],
|
||||
record["minutesAfterWakeup"], record["minutesAsleep"], record["minutesAwake"], record["minutesToFallAsleep"], record["timeInBed"],
|
||||
d_is_main_sleep, sleep_record_type,
|
||||
d_start_datetime, d_end_datetime,
|
||||
0)
|
||||
|
||||
records_summary.append(row_summary)
|
||||
|
||||
return records_summary
|
||||
|
||||
|
||||
|
||||
def parseSleepData(sleep_data):
|
||||
if sleep_data.empty:
|
||||
return pd.DataFrame(columns=SLEEP_SUMMARY_COLUMNS)
|
||||
|
||||
device_id = sleep_data["device_id"].iloc[0]
|
||||
records_summary = []
|
||||
# Parse JSON into individual records
|
||||
for multi_record in sleep_data.json_fitbit_column:
|
||||
sleep_record = json.loads(multi_record)
|
||||
if "sleep" in sleep_record:
|
||||
for record in sleep_record["sleep"]:
|
||||
# Whether the sleep episode is nap (0) or main sleep (1)
|
||||
d_is_main_sleep = 1 if record["isMainSleep"] else 0
|
||||
|
||||
# For sleep API version 1
|
||||
if "awakeCount" in record:
|
||||
records_summary = parseOneSleepRecord(record, device_id, d_is_main_sleep, records_summary, "classic")
|
||||
# For sleep API version 1.2
|
||||
else:
|
||||
records_summary = parseOneSleepRecord(record, device_id, d_is_main_sleep, records_summary, record['type'])
|
||||
|
||||
parsed_data = pd.DataFrame(data=records_summary, columns=SLEEP_SUMMARY_COLUMNS)
|
||||
|
||||
return parsed_data
|
||||
|
||||
def main(json_raw, stream_parameters):
|
||||
parsed_data = parseSleepData(json_raw)
|
||||
parsed_data["timestamp"] = 0 # this column is added at readable_datetime.R because we neeed to take into account multiple timezones
|
||||
parsed_data['local_start_date_time'] = parsed_data['local_start_date_time'].dt.strftime('%Y-%m-%d %H:%M:%S')
|
||||
parsed_data['local_end_date_time'] = parsed_data['local_end_date_time'].dt.strftime('%Y-%m-%d %H:%M:%S')
|
||||
|
||||
if stream_parameters["SLEEP_SUMMARY_EPISODE_DAY_ANCHOR"] == "start":
|
||||
parsed_data["local_date_time"] = parsed_data['local_start_date_time']
|
||||
else:
|
||||
parsed_data["local_date_time"] = parsed_data['local_end_date_time']
|
||||
|
||||
return(parsed_data)
|
|
@ -144,6 +144,21 @@ FITBIT_HEARTRATE_INTRADAY:
|
|||
- HEARTRATE
|
||||
- HEARTRATE_ZONE
|
||||
|
||||
FITBIT_SLEEP_SUMMARY:
|
||||
- TIMESTAMP
|
||||
- DEVICE_ID
|
||||
- LOCAL_DATE_TIME
|
||||
- LOCAL_START_DATE_TIME
|
||||
- LOCAL_END_DATE_TIME
|
||||
- EFFICIENCY
|
||||
- MINUTES_AFTER_WAKEUP
|
||||
- MINUTES_ASLEEP
|
||||
- MINUTES_AWAKE
|
||||
- MINUTES_TO_FALL_ASLEEP
|
||||
- MINUTES_IN_BED
|
||||
- IS_MAIN_SLEEP
|
||||
- TYPE
|
||||
|
||||
FITBIT_STEPS_SUMMARY:
|
||||
- TIMESTAMP
|
||||
- DEVICE_ID
|
||||
|
|
|
@ -832,9 +832,13 @@ properties:
|
|||
type: string
|
||||
fitbitjson_mysql:
|
||||
type: object
|
||||
required: [DATABASE_GROUP, SLEEP_SUMMARY_EPISODE_DAY_ANCHOR]
|
||||
properties:
|
||||
DATABASE_GROUP:
|
||||
type: string
|
||||
SLEEP_SUMMARY_EPISODE_DAY_ANCHOR:
|
||||
type: string
|
||||
enum: ["start", "end"]
|
||||
|
||||
FITBIT_DATA_YIELD:
|
||||
type: object
|
||||
|
@ -903,13 +907,10 @@ properties:
|
|||
|
||||
FITBIT_SLEEP_SUMMARY:
|
||||
type: object
|
||||
required: [TABLE, SLEEP_EPISODE_TIMESTAMP, PROVIDERS]
|
||||
required: [TABLE, PROVIDERS]
|
||||
properties:
|
||||
TABLE:
|
||||
type: string
|
||||
SLEEP_EPISODE_TIMESTAMP:
|
||||
type: string
|
||||
enum: ["start", "end"]
|
||||
PROVIDERS:
|
||||
type: ["null", object]
|
||||
properties:
|
||||
|
|
Loading…
Reference in New Issue