diff --git a/Snakefile b/Snakefile index 32cb517d..6b753b00 100644 --- a/Snakefile +++ b/Snakefile @@ -244,8 +244,7 @@ for provider in config["FITBIT_HEARTRATE_INTRADAY"]["PROVIDERS"].keys(): for provider in config["FITBIT_SLEEP_SUMMARY"]["PROVIDERS"].keys(): if config["FITBIT_SLEEP_SUMMARY"]["PROVIDERS"][provider]["COMPUTE"]: files_to_compute.extend(expand("data/raw/{pid}/fitbit_sleep_summary_raw.csv", pid=config["PIDS"])) - files_to_compute.extend(expand("data/raw/{pid}/fitbit_sleep_summary_parsed.csv", pid=config["PIDS"])) - files_to_compute.extend(expand("data/raw/{pid}/fitbit_sleep_summary_parsed_with_datetime.csv", pid=config["PIDS"])) + files_to_compute.extend(expand("data/raw/{pid}/fitbit_sleep_summary_with_datetime.csv", pid=config["PIDS"])) files_to_compute.extend(expand("data/interim/{pid}/fitbit_sleep_summary_features/fitbit_sleep_summary_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["FITBIT_SLEEP_SUMMARY"]["PROVIDERS"][provider]["SRC_LANGUAGE"].lower(), provider_key=provider.lower())) files_to_compute.extend(expand("data/processed/features/{pid}/fitbit_sleep_summary.csv", pid=config["PIDS"])) files_to_compute.extend(expand("data/processed/features/{pid}/all_sensor_features.csv", pid=config["PIDS"])) diff --git a/config.yaml b/config.yaml index 00207630..1c3abcc2 100644 --- a/config.yaml +++ b/config.yaml @@ -340,6 +340,7 @@ FITBIT_DATA_STREAMS: # AVAILABLE: fitbitjson_mysql: DATABASE_GROUP: MY_GROUP + SLEEP_SUMMARY_EPISODE_DAY_ANCHOR: end # summary sleep episodes are considered as events based on either the start timestamp or end timestamp. # Sensors ------ @@ -378,7 +379,6 @@ FITBIT_HEARTRATE_INTRADAY: # See https://www.rapids.science/latest/features/fitbit-sleep-summary/ FITBIT_SLEEP_SUMMARY: TABLE: sleep_summary - SLEEP_EPISODE_TIMESTAMP: end # summary sleep episodes are considered as events based on either the start timestamp or end timestamp. PROVIDERS: RAPIDS: COMPUTE: False diff --git a/docs/datastreams/fitbitjson-mysql.md b/docs/datastreams/fitbitjson-mysql.md index a4b4e1c2..6b61741e 100644 --- a/docs/datastreams/fitbitjson-mysql.md +++ b/docs/datastreams/fitbitjson-mysql.md @@ -98,6 +98,53 @@ If you want RAPIDS to process Fitbit sensor data using this stream, you will nee |a748ee1a-1d0b-4ae9-9074-279a2b6ba524 |{"activities-heart":[{"dateTime":"2020-10-08","value":{"customHeartRateZones":[],"heartRateZones":[{"caloriesOut":1100.1120,"max":89,"min":30,"minutes":921,"name":"Out of Range"},{"caloriesOut":660.0012,"max":118,"min":82,"minutes":361,"name":"Fat Burn"},{"caloriesOut":23.7088,"max":142,"min":108,"minutes":3,"name":"Cardio"},{"caloriesOut":0,"max":221,"min":148,"minutes":0,"name":"Peak"}],"restingHeartRate":70}}],"activities-heart-intraday":{"dataset":[{"time":"00:00:00","value":77},{"time":"00:01:00","value":75},{"time":"00:02:00","value":73},...],"datasetInterval":1,"datasetType":"minute"}} |a748ee1a-1d0b-4ae9-9074-279a2b6ba524 |{"activities-heart":[{"dateTime":"2020-10-09","value":{"customHeartRateZones":[],"heartRateZones":[{"caloriesOut":750.3615,"max":77,"min":30,"minutes":851,"name":"Out of Range"},{"caloriesOut":734.1516,"max":107,"min":77,"minutes":550,"name":"Fat Burn"},{"caloriesOut":131.8579,"max":130,"min":107,"minutes":29,"name":"Cardio"},{"caloriesOut":0,"max":220,"min":130,"minutes":0,"name":"Peak"}],"restingHeartRate":69}}],"activities-heart-intraday":{"dataset":[{"time":"00:00:00","value":90},{"time":"00:01:00","value":89},{"time":"00:02:00","value":88},...],"datasetInterval":1,"datasetType":"minute"}} +??? info "FITBIT_SLEEP_SUMMARY" + + **RAPIDS_COLUMN_MAPPINGS** + + | RAPIDS column | Stream column | + |-----------------|-----------------| + | TIMESTAMP | FLAG_TO_MUTATE | + | LOCAL_DATE_TIME | FLAG_TO_MUTATE | + | LOCAL_START_DATE_TIME | FLAG_TO_MUTATE | + | LOCAL_END_DATE_TIME | FLAG_TO_MUTATE | + | DEVICE_ID | device_id | + | EFFICIENCY | FLAG_TO_MUTATE | + | MINUTES_AFTER_WAKEUP | FLAG_TO_MUTATE | + | MINUTES_ASLEEP | FLAG_TO_MUTATE | + | MINUTES_AWAKE | FLAG_TO_MUTATE | + | MINUTES_TO_FALL_ASLEEP | FLAG_TO_MUTATE | + | MINUTES_IN_BED | FLAG_TO_MUTATE | + | IS_MAIN_SLEEP | FLAG_TO_MUTATE | + | TYPE | FLAG_TO_MUTATE | + + **MUTATION** + + - **COLUMN_MAPPINGS** + + | Script column | Stream column | + |-----------------|-----------------| + | JSON_FITBIT_COLUMN | fitbit_data | + + - **SCRIPTS** + + ```bash + src/data/streams/mutations/fitbit/parse_sleep_summary_json.py + ``` + + !!! note + + Fitbit API has two versions for sleep data, v1 and v1.2. We support both but ignore v1's `count_awake`, `duration_awake`, and `count_awakenings`, `count_restless`, `duration_restless` columns. + + All columns except `DEVICE_ID` are parsed from `JSON_FITBIT_COLUMN`. `JSON_FITBIT_COLUMN` is a string column containing the JSON objects returned by Fitbit's API. See an example of the raw data RAPIDS expects for this data stream: + + ??? example "Example of the expected raw data" + + |device_id |fitbit_data | + |---------------------------------------- |--------------------------------------------------------- | + |a748ee1a-1d0b-4ae9-9074-279a2b6ba524 |{"sleep":[{"dateOfSleep":"2020-10-10","duration":3600000,"efficiency":92,"endTime":"2020-10-10T16:37:00.000","infoCode":2,"isMainSleep":false,"levels":{"data":[{"dateTime":"2020-10-10T15:36:30.000","level":"restless","seconds":60},{"dateTime":"2020-10-10T15:37:30.000","level":"asleep","seconds":660},{"dateTime":"2020-10-10T15:48:30.000","level":"restless","seconds":60},...], "summary":{"asleep":{"count":0,"minutes":56},"awake":{"count":0,"minutes":0},"restless":{"count":3,"minutes":4}}},"logId":26315914306,"minutesAfterWakeup":0,"minutesAsleep":55,"minutesAwake":5,"minutesToFallAsleep":0,"startTime":"2020-10-10T15:36:30.000","timeInBed":60,"type":"classic"},{"dateOfSleep":"2020-10-10","duration":22980000,"efficiency":88,"endTime":"2020-10-10T08:10:00.000","infoCode":0,"isMainSleep":true,"levels":{"data":[{"dateTime":"2020-10-10T01:46:30.000","level":"light","seconds":420},{"dateTime":"2020-10-10T01:53:30.000","level":"deep","seconds":1230},{"dateTime":"2020-10-10T02:14:00.000","level":"light","seconds":360},...], "summary":{"deep":{"count":3,"minutes":92,"thirtyDayAvgMinutes":0},"light":{"count":29,"minutes":193,"thirtyDayAvgMinutes":0},"rem":{"count":4,"minutes":33,"thirtyDayAvgMinutes":0},"wake":{"count":28,"minutes":65,"thirtyDayAvgMinutes":0}}},"logId":26311786557,"minutesAfterWakeup":0,"minutesAsleep":318,"minutesAwake":65,"minutesToFallAsleep":0,"startTime":"2020-10-10T01:46:30.000","timeInBed":383,"type":"stages"}],"summary":{"stages":{"deep":92,"light":193,"rem":33,"wake":65},"totalMinutesAsleep":373,"totalSleepRecords":2,"totalTimeInBed":443}} + |a748ee1a-1d0b-4ae9-9074-279a2b6ba524 |{"sleep":[{"dateOfSleep":"2020-10-11","duration":41640000,"efficiency":89,"endTime":"2020-10-11T11:47:00.000","infoCode":0,"isMainSleep":true,"levels":{"data":[{"dateTime":"2020-10-11T00:12:30.000","level":"wake","seconds":450},{"dateTime":"2020-10-11T00:20:00.000","level":"light","seconds":870},{"dateTime":"2020-10-11T00:34:30.000","level":"wake","seconds":780},...], "summary":{"deep":{"count":4,"minutes":52,"thirtyDayAvgMinutes":62},"light":{"count":32,"minutes":442,"thirtyDayAvgMinutes":364},"rem":{"count":6,"minutes":68,"thirtyDayAvgMinutes":58},"wake":{"count":29,"minutes":132,"thirtyDayAvgMinutes":94}}},"logId":26589710670,"minutesAfterWakeup":1,"minutesAsleep":562,"minutesAwake":132,"minutesToFallAsleep":0,"startTime":"2020-10-11T00:12:30.000","timeInBed":694,"type":"stages"}],"summary":{"stages":{"deep":52,"light":442,"rem":68,"wake":132},"totalMinutesAsleep":562,"totalSleepRecords":1,"totalTimeInBed":694}} + |a748ee1a-1d0b-4ae9-9074-279a2b6ba524 |{"sleep":[{"dateOfSleep":"2020-10-12","duration":28980000,"efficiency":93,"endTime":"2020-10-12T09:34:30.000","infoCode":0,"isMainSleep":true,"levels":{"data":[{"dateTime":"2020-10-12T01:31:00.000","level":"wake","seconds":600},{"dateTime":"2020-10-12T01:41:00.000","level":"light","seconds":60},{"dateTime":"2020-10-12T01:42:00.000","level":"deep","seconds":2340},...], "summary":{"deep":{"count":4,"minutes":63,"thirtyDayAvgMinutes":59},"light":{"count":27,"minutes":257,"thirtyDayAvgMinutes":364},"rem":{"count":5,"minutes":94,"thirtyDayAvgMinutes":58},"wake":{"count":24,"minutes":69,"thirtyDayAvgMinutes":95}}},"logId":26589710673,"minutesAfterWakeup":0,"minutesAsleep":415,"minutesAwake":68,"minutesToFallAsleep":0,"startTime":"2020-10-12T01:31:00.000","timeInBed":483,"type":"stages"}],"summary":{"stages":{"deep":63,"light":257,"rem":94,"wake":69},"totalMinutesAsleep":415,"totalSleepRecords":1,"totalTimeInBed":483}} ??? info "FITBIT_STEPS_SUMMARY" diff --git a/docs/datastreams/mandatory-fitbit-format.md b/docs/datastreams/mandatory-fitbit-format.md index 6394cff2..5b463b86 100644 --- a/docs/datastreams/mandatory-fitbit-format.md +++ b/docs/datastreams/mandatory-fitbit-format.md @@ -25,6 +25,24 @@ This is a description of the format RAPIDS needs to process data for the followi | HEARTRATE | Intraday heartrate | | HEARTRATE_ZONE | Heartrate [zone](https://help.fitbit.com/articles/en_US/Help_article/1565.htm#) that HEARTRATE belongs to. It is based on the heartrate zone ranges of each device | +??? info "FITBIT_SLEEP_SUMMARY" + + | RAPIDS column | Description | + |-----------------|-----------------| + | TIMESTAMP | An UNIX timestamp (13 digits) when a row of data was logged | + | LOCAL_DATE_TIME | Date time string with format `yyyy-mm-dd hh:mm:ss`, this either is a copy of LOCAL_START_DATE_TIME or LOCAL_END_DATE_TIME depending on which column is used to assign an episode to a specific day| + | LOCAL_START_DATE_TIME | Date time string with format `yyyy-mm-dd hh:mm:ss` representing the start of a daily sleep episode | + | LOCAL_END_DATE_TIME | Date time string with format `yyyy-mm-dd hh:mm:ss` representing the end of a daily sleep episode| + | DEVICE_ID | A string that uniquely identifies a device | + | EFFICIENCY | Sleep efficiency computed by fitbit as time asleep / (total time in bed - time to fall asleep)| + | MINUTES_AFTER_WAKEUP | Minutes the participant spent in bed after waking up| + | MINUTES_ASLEEP | Minutes the participant was asleep | + | MINUTES_AWAKE | Minutes the participant was awake | + | MINUTES_TO_FALL_ASLEEP | Minutes the participant spent in bed before falling asleep| + | MINUTES_IN_BED | Minutes the participant spent in bed across the sleep episode| + | IS_MAIN_SLEEP | 0 if this episode is a nap, or 1 if it is a main sleep episode| + | TYPE | stages or classic [sleep data](https://dev.fitbit.com/build/reference/web-api/sleep/)| + ??? info "FITBIT_STEPS_SUMMARY" | RAPIDS column | Description | diff --git a/rules/features.smk b/rules/features.smk index d337771f..c13a4cd9 100644 --- a/rules/features.smk +++ b/rules/features.smk @@ -636,7 +636,7 @@ rule fitbit_steps_intraday_r_features: rule fitbit_sleep_summary_python_features: input: - sensor_data = "data/raw/{pid}/fitbit_sleep_summary_parsed_with_datetime.csv", + sensor_data = "data/raw/{pid}/fitbit_sleep_summary_with_datetime.csv", time_segments_labels = "data/interim/time_segments/{pid}_time_segments_labels.csv" params: provider = lambda wildcards: config["FITBIT_SLEEP_SUMMARY"]["PROVIDERS"][wildcards.provider_key.upper()], @@ -649,7 +649,7 @@ rule fitbit_sleep_summary_python_features: rule fitbit_sleep_summary_r_features: input: - sensor_data = "data/raw/{pid}/fitbit_sleep_summary_parsed_with_datetime.csv", + sensor_data = "data/raw/{pid}/fitbit_sleep_summary_with_datetime.csv", time_segments_labels = "data/interim/time_segments/{pid}_time_segments_labels.csv" params: provider = lambda wildcards: config["FITBIT_SLEEP_SUMMARY"]["PROVIDERS"][wildcards.provider_key.upper()], diff --git a/src/data/fitbit_parse_heartrate.py b/src/data/fitbit_parse_heartrate.py deleted file mode 100644 index fffbeee5..00000000 --- a/src/data/fitbit_parse_heartrate.py +++ /dev/null @@ -1,161 +0,0 @@ -import yaml, json, sys -import pandas as pd -import numpy as np -from datetime import datetime, timezone -from math import trunc - - -HR_SUMMARY_COLUMNS = ("device_id", - "local_date_time", - "timestamp", - "heartrate_daily_restinghr", - "heartrate_daily_caloriesoutofrange", - "heartrate_daily_caloriesfatburn", - "heartrate_daily_caloriescardio", - "heartrate_daily_caloriespeak") - -HR_INTRADAY_COLUMNS = ("device_id", - "heartrate", - "heartrate_zone", - "local_date_time", - "timestamp") - -def parseHeartrateZones(heartrate_data): - # Get the range of heartrate zones: outofrange, fatburn, cardio, peak - # refer to: https://help.fitbit.com/articles/en_US/Help_article/1565 - - heartrate_fitbit_data = json.loads(heartrate_data["fitbit_data"].iloc[0])["activities-heart"][0] - # API Version X: not sure the exact version - if "heartRateZones" in heartrate_fitbit_data: - heartrate_zones = heartrate_fitbit_data["heartRateZones"] - # API VERSION Y: not sure the exact version - elif "value" in heartrate_fitbit_data: - heartrate_zones = heartrate_fitbit_data["value"]["heartRateZones"] - else: - raise ValueError("Heartrate zone are stored in an unkown format, this could mean Fitbit's heartrate API changed") - - heartrate_zones_range = {} - for hrzone in heartrate_zones: - heartrate_zones_range[hrzone["name"].lower().replace(" ", "")] = [hrzone["min"], hrzone["max"]] - return heartrate_zones_range - -def parseHeartrateSummaryData(record_summary, device_id, curr_date): - # API Version X: not sure the exact version - if "heartRateZones" in record_summary: - heartrate_zones = record_summary["heartRateZones"] - d_resting_heartrate = record_summary["value"] if "value" in record_summary else None - # API VERSION Y: not sure the exact version - elif "value" in record_summary: - heartrate_zones = record_summary["value"]["heartRateZones"] - d_resting_heartrate = record_summary["value"]["restingHeartRate"] if "restingHeartRate" in record_summary["value"] else None - else: - ValueError("Heartrate zone are stored in an unkown format, this could mean Fitbit's heartrate API changed") - - if "caloriesOut" in heartrate_zones[0]: - d_calories_outofrange = heartrate_zones[0]["caloriesOut"] - d_calories_fatburn = heartrate_zones[1]["caloriesOut"] - d_calories_cardio = heartrate_zones[2]["caloriesOut"] - d_calories_peak = heartrate_zones[3]["caloriesOut"] - else: - d_calories_outofrange, d_calories_fatburn, d_calories_cardio, d_calories_peak = None, None, None, None - - row_summary = (device_id, - curr_date, - 0, - d_resting_heartrate, - d_calories_outofrange, - d_calories_fatburn, - d_calories_cardio, - d_calories_peak) - return row_summary - - - - -def parseHeartrateIntradayData(records_intraday, dataset, device_id, curr_date, heartrate_zones_range): - for data in dataset: - d_time = datetime.strptime(data["time"], '%H:%M:%S').time() - d_datetime = datetime.combine(curr_date, d_time) - d_hr = data["value"] - - # Get heartrate zone by range: min <= heartrate < max - d_hrzone = None - for hrzone, hrrange in heartrate_zones_range.items(): - if d_hr >= hrrange[0] and d_hr < hrrange[1]: - d_hrzone = hrzone - break - - row_intraday = (device_id, - d_hr, d_hrzone, - d_datetime, - 0) - - records_intraday.append(row_intraday) - return records_intraday - - - -def parseHeartrateData(heartrate_data, fitbit_data_type): - if heartrate_data.empty: - if fitbit_data_type == "summary": - return pd.DataFrame(columns=HR_SUMMARY_COLUMNS) - elif fitbit_data_type == "intraday": - return pd.DataFrame(columns=HR_INTRADAY_COLUMNS) - - device_id = heartrate_data["device_id"].iloc[0] - records_summary, records_intraday = [], [] - - heartrate_zones_range = parseHeartrateZones(heartrate_data) - - # Parse JSON into individual records - for record in heartrate_data.fitbit_data: - record = json.loads(record) # Parse text into JSON - curr_date = datetime.strptime(record["activities-heart"][0]["dateTime"], "%Y-%m-%d") - - if fitbit_data_type == "summary": - record_summary = record["activities-heart"][0] - row_summary = parseHeartrateSummaryData(record_summary, device_id, curr_date) - records_summary.append(row_summary) - - if fitbit_data_type == "intraday": - dataset = record["activities-heart-intraday"]["dataset"] - records_intraday = parseHeartrateIntradayData(records_intraday, dataset, device_id, curr_date, heartrate_zones_range) - - if fitbit_data_type == "summary": - parsed_data = pd.DataFrame(data=records_summary, columns=HR_SUMMARY_COLUMNS) - elif fitbit_data_type == "intraday": - parsed_data = pd.DataFrame(data=records_intraday, columns=HR_INTRADAY_COLUMNS) - return parsed_data - - - -timezone = snakemake.params["timezone"] -column_format = snakemake.params["column_format"] -fitbit_data_type = snakemake.params["fitbit_data_type"] - -with open(snakemake.input["participant_file"], "r", encoding="utf-8") as f: - participant_file = yaml.safe_load(f) -local_start_date = pd.Timestamp(participant_file["FITBIT"]["START_DATE"]) -local_end_date = pd.Timestamp(participant_file["FITBIT"]["END_DATE"]) + pd.DateOffset(1) - -if column_format == "JSON": - json_raw = pd.read_csv(snakemake.input["raw_data"]) - parsed_data = parseHeartrateData(json_raw, fitbit_data_type) -elif column_format == "PLAIN_TEXT": - parsed_data = pd.read_csv(snakemake.input["raw_data"], parse_dates=["local_date_time"], date_parser=lambda col: pd.to_datetime(col).tz_localize(None)) -else: - raise ValueError("column_format can only be one of ['JSON', 'PLAIN_TEXT'].") - -# discard rows with restinghr = 0 -if fitbit_data_type == "summary": - parsed_data = parsed_data[(parsed_data["heartrate_daily_restinghr"] != "0") & (parsed_data["heartrate_daily_restinghr"] != 0)] - -# Only keep dates in the range of [local_start_date, local_end_date) -if not pd.isnull(local_start_date) and not pd.isnull(local_end_date): - parsed_data = parsed_data.loc[(parsed_data["local_date_time"] >= local_start_date) & (parsed_data["local_date_time"] < local_end_date)] - -if parsed_data.shape[0] > 0: - parsed_data["timestamp"] = parsed_data["local_date_time"].dt.tz_localize(timezone, ambiguous=False, nonexistent="NaT").dropna().astype(np.int64) // 10**6 - parsed_data.dropna(subset=['timestamp'], inplace=True) - -parsed_data.to_csv(snakemake.output[0], index=False) diff --git a/src/data/streams/fitbitjson_mysql/format.yaml b/src/data/streams/fitbitjson_mysql/format.yaml index a9fd88da..ac022c05 100644 --- a/src/data/streams/fitbitjson_mysql/format.yaml +++ b/src/data/streams/fitbitjson_mysql/format.yaml @@ -27,6 +27,26 @@ FITBIT_HEARTRATE_INTRADAY: SCRIPTS: # List any python or r scripts that mutate your raw data - src/data/streams/mutations/fitbit/parse_heartrate_intraday_json.py +FITBIT_SLEEP_SUMMARY: + RAPIDS_COLUMN_MAPPINGS: + TIMESTAMP: FLAG_TO_MUTATE + DEVICE_ID: device_id + LOCAL_DATE_TIME: FLAG_TO_MUTATE + LOCAL_START_DATE_TIME: FLAG_TO_MUTATE + LOCAL_END_DATE_TIME: FLAG_TO_MUTATE + EFFICIENCY: FLAG_TO_MUTATE + MINUTES_AFTER_WAKEUP: FLAG_TO_MUTATE + MINUTES_ASLEEP: FLAG_TO_MUTATE + MINUTES_AWAKE: FLAG_TO_MUTATE + MINUTES_TO_FALL_ASLEEP: FLAG_TO_MUTATE + MINUTES_IN_BED: FLAG_TO_MUTATE + IS_MAIN_SLEEP: FLAG_TO_MUTATE + TYPE: FLAG_TO_MUTATE + MUTATION: + COLUMN_MAPPINGS: + JSON_FITBIT_COLUMN: fitbit_data # text column with JSON objects + SCRIPTS: # List any python or r scripts that mutate your raw data + - src/data/streams/mutations/fitbit/parse_sleep_summary_json.py FITBIT_STEPS_SUMMARY: RAPIDS_COLUMN_MAPPINGS: diff --git a/src/data/streams/mutations/fitbit/parse_sleep_summary_json.py b/src/data/streams/mutations/fitbit/parse_sleep_summary_json.py new file mode 100644 index 00000000..121901d7 --- /dev/null +++ b/src/data/streams/mutations/fitbit/parse_sleep_summary_json.py @@ -0,0 +1,70 @@ +import json, yaml +import pandas as pd +import numpy as np +from datetime import datetime, timedelta +import dateutil.parser + +SLEEP_SUMMARY_COLUMNS = ("device_id", "efficiency", + "minutes_after_wakeup", "minutes_asleep", "minutes_awake", "minutes_to_fall_asleep", "minutes_in_bed", + "is_main_sleep", "type", + "local_start_date_time", "local_end_date_time", + "timestamp") + + +# Parse one record for sleep API version 1.2 +def parseOneSleepRecord(record, device_id, d_is_main_sleep, records_summary, episode_type): + + sleep_record_type = episode_type + + d_start_datetime = datetime.strptime(record["startTime"][:18], "%Y-%m-%dT%H:%M:%S") + d_end_datetime = datetime.strptime(record["endTime"][:18], "%Y-%m-%dT%H:%M:%S") + # Summary data + row_summary = (device_id, record["efficiency"], + record["minutesAfterWakeup"], record["minutesAsleep"], record["minutesAwake"], record["minutesToFallAsleep"], record["timeInBed"], + d_is_main_sleep, sleep_record_type, + d_start_datetime, d_end_datetime, + 0) + + records_summary.append(row_summary) + + return records_summary + + + +def parseSleepData(sleep_data): + if sleep_data.empty: + return pd.DataFrame(columns=SLEEP_SUMMARY_COLUMNS) + + device_id = sleep_data["device_id"].iloc[0] + records_summary = [] + # Parse JSON into individual records + for multi_record in sleep_data.json_fitbit_column: + sleep_record = json.loads(multi_record) + if "sleep" in sleep_record: + for record in sleep_record["sleep"]: + # Whether the sleep episode is nap (0) or main sleep (1) + d_is_main_sleep = 1 if record["isMainSleep"] else 0 + + # For sleep API version 1 + if "awakeCount" in record: + records_summary = parseOneSleepRecord(record, device_id, d_is_main_sleep, records_summary, "classic") + # For sleep API version 1.2 + else: + records_summary = parseOneSleepRecord(record, device_id, d_is_main_sleep, records_summary, record['type']) + + parsed_data = pd.DataFrame(data=records_summary, columns=SLEEP_SUMMARY_COLUMNS) + + return parsed_data + +def main(json_raw, stream_parameters): + parsed_data = parseSleepData(json_raw) + parsed_data["timestamp"] = 0 # this column is added at readable_datetime.R because we neeed to take into account multiple timezones + parsed_data['local_start_date_time'] = parsed_data['local_start_date_time'].dt.strftime('%Y-%m-%d %H:%M:%S') + parsed_data['local_end_date_time'] = parsed_data['local_end_date_time'].dt.strftime('%Y-%m-%d %H:%M:%S') + + if stream_parameters["SLEEP_SUMMARY_EPISODE_DAY_ANCHOR"] == "start": + parsed_data["local_date_time"] = parsed_data['local_start_date_time'] + else: + parsed_data["local_date_time"] = parsed_data['local_end_date_time'] + + return(parsed_data) diff --git a/src/data/streams/rapids_columns.yaml b/src/data/streams/rapids_columns.yaml index af64dbdd..99fc2377 100644 --- a/src/data/streams/rapids_columns.yaml +++ b/src/data/streams/rapids_columns.yaml @@ -144,6 +144,21 @@ FITBIT_HEARTRATE_INTRADAY: - HEARTRATE - HEARTRATE_ZONE +FITBIT_SLEEP_SUMMARY: + - TIMESTAMP + - DEVICE_ID + - LOCAL_DATE_TIME + - LOCAL_START_DATE_TIME + - LOCAL_END_DATE_TIME + - EFFICIENCY + - MINUTES_AFTER_WAKEUP + - MINUTES_ASLEEP + - MINUTES_AWAKE + - MINUTES_TO_FALL_ASLEEP + - MINUTES_IN_BED + - IS_MAIN_SLEEP + - TYPE + FITBIT_STEPS_SUMMARY: - TIMESTAMP - DEVICE_ID diff --git a/tools/config.schema.yaml b/tools/config.schema.yaml index 192e9707..b6dedbc5 100644 --- a/tools/config.schema.yaml +++ b/tools/config.schema.yaml @@ -832,9 +832,13 @@ properties: type: string fitbitjson_mysql: type: object + required: [DATABASE_GROUP, SLEEP_SUMMARY_EPISODE_DAY_ANCHOR] properties: DATABASE_GROUP: type: string + SLEEP_SUMMARY_EPISODE_DAY_ANCHOR: + type: string + enum: ["start", "end"] FITBIT_DATA_YIELD: type: object @@ -903,13 +907,10 @@ properties: FITBIT_SLEEP_SUMMARY: type: object - required: [TABLE, SLEEP_EPISODE_TIMESTAMP, PROVIDERS] + required: [TABLE, PROVIDERS] properties: TABLE: type: string - SLEEP_EPISODE_TIMESTAMP: - type: string - enum: ["start", "end"] PROVIDERS: type: ["null", object] properties: