From 72f6b2d621a75a7a7ee5cfa557154c3c17c9a73d Mon Sep 17 00:00:00 2001 From: JulioV Date: Wed, 10 Mar 2021 10:07:52 -0500 Subject: [PATCH] Add steps intraday to fitbitjson_mysql --- Snakefile | 3 +- docs/datastreams/fitbitjson-mysql.md | 37 +++++++++++++++++++ docs/datastreams/mandatory-fitbit-format.md | 11 +++++- rules/features.smk | 4 +- src/data/streams/fitbitjson_mysql/format.yaml | 12 ++++++ .../fitbit/parse_steps_intraday_json.py | 5 ++- src/data/streams/rapids_columns.yaml | 6 +++ 7 files changed, 71 insertions(+), 7 deletions(-) diff --git a/Snakefile b/Snakefile index 56d89a6b..d8288f73 100644 --- a/Snakefile +++ b/Snakefile @@ -276,8 +276,7 @@ for provider in config["FITBIT_STEPS_SUMMARY"]["PROVIDERS"].keys(): for provider in config["FITBIT_STEPS_INTRADAY"]["PROVIDERS"].keys(): if config["FITBIT_STEPS_INTRADAY"]["PROVIDERS"][provider]["COMPUTE"]: files_to_compute.extend(expand("data/raw/{pid}/fitbit_steps_intraday_raw.csv", pid=config["PIDS"])) - files_to_compute.extend(expand("data/raw/{pid}/fitbit_steps_intraday_parsed.csv", pid=config["PIDS"])) - files_to_compute.extend(expand("data/raw/{pid}/fitbit_steps_intraday_parsed_with_datetime.csv", pid=config["PIDS"])) + files_to_compute.extend(expand("data/raw/{pid}/fitbit_steps_intraday_with_datetime.csv", pid=config["PIDS"])) files_to_compute.extend(expand("data/interim/{pid}/fitbit_steps_intraday_features/fitbit_steps_intraday_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["FITBIT_STEPS_INTRADAY"]["PROVIDERS"][provider]["SRC_LANGUAGE"].lower(), provider_key=provider.lower())) files_to_compute.extend(expand("data/processed/features/{pid}/fitbit_steps_intraday.csv", pid=config["PIDS"])) files_to_compute.extend(expand("data/processed/features/{pid}/all_sensor_features.csv", pid=config["PIDS"])) diff --git a/docs/datastreams/fitbitjson-mysql.md b/docs/datastreams/fitbitjson-mysql.md index 9028e7ea..b25ae2d8 100644 --- a/docs/datastreams/fitbitjson-mysql.md +++ b/docs/datastreams/fitbitjson-mysql.md @@ -83,3 +83,40 @@ If you want RAPIDS to process Fitbit sensor data using this stream, you will nee |a748ee1a-1d0b-4ae9-9074-279a2b6ba524 |"activities-steps":[{"dateTime":"2020-10-08","value":"3201"}],"activities-steps-intraday":{"dataset":[{"time":"00:00:00","value":14},{"time":"00:01:00","value":11},{"time":"00:02:00","value":10},...],"datasetInterval":1,"datasetType":"minute"}} |a748ee1a-1d0b-4ae9-9074-279a2b6ba524 |"activities-steps":[{"dateTime":"2020-10-09","value":"998"}],"activities-steps-intraday":{"dataset":[{"time":"00:00:00","value":0},{"time":"00:01:00","value":0},{"time":"00:02:00","value":0},...],"datasetInterval":1,"datasetType":"minute"}} +??? info "FITBIT_STEPS_INTRADAY" + + **RAPIDS_COLUMN_MAPPINGS** + + | RAPIDS column | Stream column | + |-----------------|-----------------| + | TIMESTAMP | FLAG_TO_MUTATE | + | DEVICE_ID | device_id | + | LOCAL_DATE_TIME | FLAG_TO_MUTATE | + | STEPS | FLAG_TO_MUTATE | + + **MUTATION** + + - **COLUMN_MAPPINGS** + + | Script column | Stream column | + |-----------------|-----------------| + | JSON_FITBIT_COLUMN | fitbit_data | + + - **SCRIPTS** + + ```bash + src/data/streams/mutations/fitbit/parse_steps_intraday_json.py + ``` + + !!! note + `TIMESTAMP`, `LOCAL_DATE_TIME`, and `STEPS` are parsed from `JSON_FITBIT_COLUMN`. `JSON_FITBIT_COLUMN` is a string column containing the JSON objects returned by [Fitbit's API](https://dev.fitbit.com/build/reference/web-api/activity/#get-activity-intraday-time-series). See an example of the raw data RAPIDS expects for this data stream: + + ??? example "Example of the expected raw data" + + |device_id |fitbit_data | + |---------------------------------------- |--------------------------------------------------------- | + |a748ee1a-1d0b-4ae9-9074-279a2b6ba524 |"activities-steps":[{"dateTime":"2020-10-07","value":"1775"}],"activities-steps-intraday":{"dataset":[{"time":"00:00:00","value":5},{"time":"00:01:00","value":3},{"time":"00:02:00","value":0},...],"datasetInterval":1,"datasetType":"minute"}} + |a748ee1a-1d0b-4ae9-9074-279a2b6ba524 |"activities-steps":[{"dateTime":"2020-10-08","value":"3201"}],"activities-steps-intraday":{"dataset":[{"time":"00:00:00","value":14},{"time":"00:01:00","value":11},{"time":"00:02:00","value":10},...],"datasetInterval":1,"datasetType":"minute"}} + |a748ee1a-1d0b-4ae9-9074-279a2b6ba524 |"activities-steps":[{"dateTime":"2020-10-09","value":"998"}],"activities-steps-intraday":{"dataset":[{"time":"00:00:00","value":0},{"time":"00:01:00","value":0},{"time":"00:02:00","value":0},...],"datasetInterval":1,"datasetType":"minute"}} + + \ No newline at end of file diff --git a/docs/datastreams/mandatory-fitbit-format.md b/docs/datastreams/mandatory-fitbit-format.md index 14545bb2..2c52bae6 100644 --- a/docs/datastreams/mandatory-fitbit-format.md +++ b/docs/datastreams/mandatory-fitbit-format.md @@ -21,4 +21,13 @@ This is a description of the format RAPIDS needs to process data for the followi | TIMESTAMP | An UNIX timestamp (13 digits) when a row of data was logged | | LOCAL_DATE_TIME | Date time string with format `yyyy-mm-dd hh:mm:ss` | | DEVICE_ID | A string that uniquely identifies a device | - | STEPS | Daily step count | \ No newline at end of file + | STEPS | Daily step count | + +??? info "FITBIT_STEPS_INTRADAY" + + | RAPIDS column | Description | + |-----------------|-----------------| + | TIMESTAMP | An UNIX timestamp (13 digits) when a row of data was logged | + | LOCAL_DATE_TIME | Date time string with format `yyyy-mm-dd hh:mm:ss` | + | DEVICE_ID | A string that uniquely identifies a device | + | STEPS | Intraday step count (usually every minute)| \ No newline at end of file diff --git a/rules/features.smk b/rules/features.smk index 07b3600e..a2b0045d 100644 --- a/rules/features.smk +++ b/rules/features.smk @@ -610,7 +610,7 @@ rule fitbit_steps_summary_r_features: rule fitbit_steps_intraday_python_features: input: - sensor_data = "data/raw/{pid}/fitbit_steps_intraday_parsed_with_datetime.csv", + sensor_data = "data/raw/{pid}/fitbit_steps_intraday_with_datetime.csv", time_segments_labels = "data/interim/time_segments/{pid}_time_segments_labels.csv" params: provider = lambda wildcards: config["FITBIT_STEPS_INTRADAY"]["PROVIDERS"][wildcards.provider_key.upper()], @@ -623,7 +623,7 @@ rule fitbit_steps_intraday_python_features: rule fitbit_steps_intraday_r_features: input: - sensor_data = "data/raw/{pid}/fitbit_steps_intraday_parsed_with_datetime.csv", + sensor_data = "data/raw/{pid}/fitbit_steps_intraday_with_datetime.csv", time_segments_labels = "data/interim/time_segments/{pid}_time_segments_labels.csv" params: provider = lambda wildcards: config["FITBIT_STEPS_INTRADAY"]["PROVIDERS"][wildcards.provider_key.upper()], diff --git a/src/data/streams/fitbitjson_mysql/format.yaml b/src/data/streams/fitbitjson_mysql/format.yaml index 55b05e58..5c055293 100644 --- a/src/data/streams/fitbitjson_mysql/format.yaml +++ b/src/data/streams/fitbitjson_mysql/format.yaml @@ -9,3 +9,15 @@ FITBIT_STEPS_SUMMARY: JSON_FITBIT_COLUMN: fitbit_data # text column with JSON objects SCRIPTS: # List any python or r scripts that mutate your raw data - src/data/streams/mutations/fitbit/parse_steps_summary_json.py + +FITBIT_STEPS_INTRADAY: + RAPIDS_COLUMN_MAPPINGS: + TIMESTAMP: FLAG_TO_MUTATE + DEVICE_ID: device_id + LOCAL_DATE_TIME: FLAG_TO_MUTATE + STEPS: FLAG_TO_MUTATE + MUTATION: + COLUMN_MAPPINGS: + JSON_FITBIT_COLUMN: fitbit_data # text column with JSON objects + SCRIPTS: # List any python or r scripts that mutate your raw data + - src/data/streams/mutations/fitbit/parse_steps_intraday_json.py diff --git a/src/data/streams/mutations/fitbit/parse_steps_intraday_json.py b/src/data/streams/mutations/fitbit/parse_steps_intraday_json.py index 4194cc4e..f9ea13a4 100644 --- a/src/data/streams/mutations/fitbit/parse_steps_intraday_json.py +++ b/src/data/streams/mutations/fitbit/parse_steps_intraday_json.py @@ -13,7 +13,7 @@ def parseStepsData(steps_data): records = [] # Parse JSON into individual records - for record in steps_data.fitbit_data: + for record in steps_data.json_fitbit_column: record = json.loads(record) # Parse text into JSON if "activities-steps" in record.keys(): curr_date = datetime.strptime(record["activities-steps"][0]["dateTime"], "%Y-%m-%d") @@ -39,5 +39,6 @@ def parseStepsData(steps_data): def main(json_raw, stream_parameters): parsed_data = parseStepsData(json_raw) - parsed_data["timestamp"] = None # this column is added at readable_datetime.R because we neeed to take into account multiple timezones + parsed_data["timestamp"] = 0 # this column is added at readable_datetime.R because we neeed to take into account multiple timezones + parsed_data['local_date_time'] = parsed_data['local_date_time'].dt.strftime('%Y-%m-%d %H:%M:%S') return(parsed_data) diff --git a/src/data/streams/rapids_columns.yaml b/src/data/streams/rapids_columns.yaml index b3bfc45e..f97e9c79 100644 --- a/src/data/streams/rapids_columns.yaml +++ b/src/data/streams/rapids_columns.yaml @@ -98,6 +98,12 @@ FITBIT_STEPS_SUMMARY: - LOCAL_DATE_TIME - STEPS +FITBIT_STEPS_INTRADAY: + - TIMESTAMP + - DEVICE_ID + - LOCAL_DATE_TIME + - STEPS + EMPATICA_ACCELEROMETER: - TIMESTAMP - DEVICE_ID