From 9a276c1c669665b5e5f79271458ebf7e586280c5 Mon Sep 17 00:00:00 2001 From: JulioV Date: Wed, 10 Mar 2021 09:54:40 -0500 Subject: [PATCH] Add steps summary to jsonfitbit_mysql --- docs/datastreams/fitbitjson-mysql.md | 54 ++++++++++++++----- docs/datastreams/mandatory-fitbit-format.md | 17 ++++-- .../fitbit/parse_steps_intraday_json.py | 43 +++++++++++++++ 3 files changed, 98 insertions(+), 16 deletions(-) create mode 100644 src/data/streams/mutations/fitbit/parse_steps_intraday_json.py diff --git a/docs/datastreams/fitbitjson-mysql.md b/docs/datastreams/fitbitjson-mysql.md index abb9d516..9028e7ea 100644 --- a/docs/datastreams/fitbitjson-mysql.md +++ b/docs/datastreams/fitbitjson-mysql.md @@ -1,8 +1,8 @@ # `fitbitjson_mysql` -This [data stream](../../datastreams/data-streams-introduction) handles Fitbit sensor data downloaded using the [Fitbit Web API](https://dev.fitbit.com/build/reference/web-api/) and stored in a MySQL database. Please note that RAPIDS cannot query the API directly, you need to use other available tools or implement your own. Once you have your sensor data in a MySQL database, RAPIDS can process it. +This [data stream](../../datastreams/data-streams-introduction) handles Fitbit sensor data downloaded using the [Fitbit Web API](https://dev.fitbit.com/build/reference/web-api/) and stored in a MySQL database. Please note that RAPIDS cannot query the API directly; you need to use other available tools or implement your own. Once you have your sensor data in a MySQL database, RAPIDS can process it. ## Container -A MySQL database with a table per sensor, each containing the data for all participants. +The container should be a MySQL database with a table per sensor, each containing the data for all participants. The script to connect and download data from this container is at: ```bash @@ -17,16 +17,9 @@ The `format.yaml` maps and transforms columns in your raw data stream to the [ma src/data/streams/fitbitjson_mysql/format.yaml ``` -If you want RAPIDS to process Fitbit sensor data using this stream, you will need to replace the following `RAPIDS_COLUMN_MAPPINGS` inside **each sensor** section in `format.yaml` to match your raw data column names: +If you want RAPIDS to process Fitbit sensor data using this stream, you will need to replace `[RAPIDS_COLUMN_MAPPINGS]`/`[MUTATION][COLUMN_MAPPINGS]` inside **each sensor** section in `format.yaml` to match your raw data column names: -| Column | Description | -|-----------------|-----------------| -| device_id | A string that uniquely identifies a device | -| fitbit_data | A string column that contains the JSON objects downloaded from Fitbit's API | - - - -??? info "FITBIT_HEARTRATE_SUMMARY section" +??? info "FITBIT_HEARTRATE_SUMMARY" **RAPIDS_COLUMN_MAPPINGS** @@ -52,4 +45,41 @@ If you want RAPIDS to process Fitbit sensor data using this stream, you will nee |---------------------------------------- |--------------------------------------------------------- | |a748ee1a-1d0b-4ae9-9074-279a2b6ba524 |{"activities-heart":[{"dateTime":"2020-10-07","value":{"customHeartRateZones":[],"heartRateZones":[{"caloriesOut":1200.6102,"max":88,"min":31,"minutes":1058,"name":"Out of Range"},{"caloriesOut":760.3020,"max":120,"min":86,"minutes":366,"name":"Fat Burn"},{"caloriesOut":15.2048,"max":146,"min":120,"minutes":2,"name":"Cardio"},{"caloriesOut":0,"max":221,"min":148,"minutes":0,"name":"Peak"}],"restingHeartRate":72}}],"activities-heart-intraday":{"dataset":[{"time":"00:00:00","value":68},{"time":"00:01:00","value":67},{"time":"00:02:00","value":67},...],"datasetInterval":1,"datasetType":"minute"}} |a748ee1a-1d0b-4ae9-9074-279a2b6ba524 |{"activities-heart":[{"dateTime":"2020-10-08","value":{"customHeartRateZones":[],"heartRateZones":[{"caloriesOut":1100.1120,"max":89,"min":30,"minutes":921,"name":"Out of Range"},{"caloriesOut":660.0012,"max":118,"min":82,"minutes":361,"name":"Fat Burn"},{"caloriesOut":23.7088,"max":142,"min":108,"minutes":3,"name":"Cardio"},{"caloriesOut":0,"max":221,"min":148,"minutes":0,"name":"Peak"}],"restingHeartRate":70}}],"activities-heart-intraday":{"dataset":[{"time":"00:00:00","value":77},{"time":"00:01:00","value":75},{"time":"00:02:00","value":73},...],"datasetInterval":1,"datasetType":"minute"}} - |a748ee1a-1d0b-4ae9-9074-279a2b6ba524 |{"activities-heart":[{"dateTime":"2020-10-09","value":{"customHeartRateZones":[],"heartRateZones":[{"caloriesOut":750.3615,"max":77,"min":30,"minutes":851,"name":"Out of Range"},{"caloriesOut":734.1516,"max":107,"min":77,"minutes":550,"name":"Fat Burn"},{"caloriesOut":131.8579,"max":130,"min":107,"minutes":29,"name":"Cardio"},{"caloriesOut":0,"max":220,"min":130,"minutes":0,"name":"Peak"}],"restingHeartRate":69}}],"activities-heart-intraday":{"dataset":[{"time":"00:00:00","value":90},{"time":"00:01:00","value":89},{"time":"00:02:00","value":88},...],"datasetInterval":1,"datasetType":"minute"}} \ No newline at end of file + |a748ee1a-1d0b-4ae9-9074-279a2b6ba524 |{"activities-heart":[{"dateTime":"2020-10-09","value":{"customHeartRateZones":[],"heartRateZones":[{"caloriesOut":750.3615,"max":77,"min":30,"minutes":851,"name":"Out of Range"},{"caloriesOut":734.1516,"max":107,"min":77,"minutes":550,"name":"Fat Burn"},{"caloriesOut":131.8579,"max":130,"min":107,"minutes":29,"name":"Cardio"},{"caloriesOut":0,"max":220,"min":130,"minutes":0,"name":"Peak"}],"restingHeartRate":69}}],"activities-heart-intraday":{"dataset":[{"time":"00:00:00","value":90},{"time":"00:01:00","value":89},{"time":"00:02:00","value":88},...],"datasetInterval":1,"datasetType":"minute"}} + +??? info "FITBIT_STEPS_SUMMARY" + + **RAPIDS_COLUMN_MAPPINGS** + + | RAPIDS column | Stream column | + |-----------------|-----------------| + | TIMESTAMP | FLAG_TO_MUTATE | + | DEVICE_ID | device_id | + | LOCAL_DATE_TIME | FLAG_TO_MUTATE | + | STEPS | FLAG_TO_MUTATE | + + **MUTATION** + + - **COLUMN_MAPPINGS** + + | Script column | Stream column | + |-----------------|-----------------| + | JSON_FITBIT_COLUMN | fitbit_data | + + - **SCRIPTS** + + ```bash + src/data/streams/mutations/fitbit/parse_steps_summary_json.py + ``` + + !!! note + `TIMESTAMP`, `LOCAL_DATE_TIME`, and `STEPS` are parsed from `JSON_FITBIT_COLUMN`. `JSON_FITBIT_COLUMN` is a string column containing the JSON objects returned by Fitbit's API. See an example of the raw data RAPIDS expects for this data stream: + + ??? example "Example of the expected raw data" + + |device_id |fitbit_data | + |---------------------------------------- |--------------------------------------------------------- | + |a748ee1a-1d0b-4ae9-9074-279a2b6ba524 |"activities-steps":[{"dateTime":"2020-10-07","value":"1775"}],"activities-steps-intraday":{"dataset":[{"time":"00:00:00","value":5},{"time":"00:01:00","value":3},{"time":"00:02:00","value":0},...],"datasetInterval":1,"datasetType":"minute"}} + |a748ee1a-1d0b-4ae9-9074-279a2b6ba524 |"activities-steps":[{"dateTime":"2020-10-08","value":"3201"}],"activities-steps-intraday":{"dataset":[{"time":"00:00:00","value":14},{"time":"00:01:00","value":11},{"time":"00:02:00","value":10},...],"datasetInterval":1,"datasetType":"minute"}} + |a748ee1a-1d0b-4ae9-9074-279a2b6ba524 |"activities-steps":[{"dateTime":"2020-10-09","value":"998"}],"activities-steps-intraday":{"dataset":[{"time":"00:00:00","value":0},{"time":"00:01:00","value":0},{"time":"00:02:00","value":0},...],"datasetInterval":1,"datasetType":"minute"}} + diff --git a/docs/datastreams/mandatory-fitbit-format.md b/docs/datastreams/mandatory-fitbit-format.md index 13d33b11..14545bb2 100644 --- a/docs/datastreams/mandatory-fitbit-format.md +++ b/docs/datastreams/mandatory-fitbit-format.md @@ -1,15 +1,24 @@ # Mandatory Fitbit Format -This is a description of the format RAPIDS needs to process data for the following PHONE sensors. +This is a description of the format RAPIDS needs to process data for the following Fitbit\ sensors. ??? info "FITBIT_HEARTRATE_SUMMARY" | RAPIDS column | Description | |-----------------|-----------------| - | LOCAL_DATE_TIME | TODO | - | DEVICE_ID | TODO | + | LOCAL_DATE_TIME | Date time string with format `yyyy-mm-dd hh:mm:ss` | + | DEVICE_ID | A string that uniquely identifies a device | | HEARTRATE_DAILY_RESTINGHR | TODO | | HEARTRATE_DAILY_CALORIESOUTOFRANGE | TODO | | HEARTRATE_DAILY_CALORIESFATBURN | TODO | | HEARTRATE_DAILY_CALORIESCARDIO | TODO | - | HEARTRATE_DAILY_CALORIESPEAK | TODO | \ No newline at end of file + | HEARTRATE_DAILY_CALORIESPEAK | TODO | + +??? info "FITBIT_STEPS_SUMMARY" + + | RAPIDS column | Description | + |-----------------|-----------------| + | TIMESTAMP | An UNIX timestamp (13 digits) when a row of data was logged | + | LOCAL_DATE_TIME | Date time string with format `yyyy-mm-dd hh:mm:ss` | + | DEVICE_ID | A string that uniquely identifies a device | + | STEPS | Daily step count | \ No newline at end of file diff --git a/src/data/streams/mutations/fitbit/parse_steps_intraday_json.py b/src/data/streams/mutations/fitbit/parse_steps_intraday_json.py new file mode 100644 index 00000000..4194cc4e --- /dev/null +++ b/src/data/streams/mutations/fitbit/parse_steps_intraday_json.py @@ -0,0 +1,43 @@ +import json +import pandas as pd +from datetime import datetime + +STEPS_COLUMNS = ("device_id", "steps", "local_date_time", "timestamp") + + +def parseStepsData(steps_data): + if steps_data.empty: + return pd.DataFrame(columns=STEPS_COLUMNS) + + device_id = steps_data["device_id"].iloc[0] + records = [] + + # Parse JSON into individual records + for record in steps_data.fitbit_data: + record = json.loads(record) # Parse text into JSON + if "activities-steps" in record.keys(): + curr_date = datetime.strptime(record["activities-steps"][0]["dateTime"], "%Y-%m-%d") + + # Parse intraday data + if "activities-steps-intraday" in record.keys(): + dataset = record["activities-steps-intraday"]["dataset"] + for data in dataset: + d_time = datetime.strptime(data["time"], '%H:%M:%S').time() + d_datetime = datetime.combine(curr_date, d_time) + + row_intraday = (device_id, + data["value"], + d_datetime, + 0) + + records.append(row_intraday) + + parsed_data = pd.DataFrame(data=records, columns=STEPS_COLUMNS) + + return parsed_data + + +def main(json_raw, stream_parameters): + parsed_data = parseStepsData(json_raw) + parsed_data["timestamp"] = None # this column is added at readable_datetime.R because we neeed to take into account multiple timezones + return(parsed_data)