From d529490999567264820a335220fe72b405670c79 Mon Sep 17 00:00:00 2001 From: Meng Li <34143965+Meng6@users.noreply.github.com> Date: Fri, 12 Mar 2021 12:38:36 -0500 Subject: [PATCH] Migrate fitbit features to new data stream --- Snakefile | 5 +- docs/features/fitbit-data-yield.md | 3 +- docs/features/fitbit-heartrate-intraday.md | 28 +--------- docs/features/fitbit-heartrate-summary.md | 28 +--------- docs/features/fitbit-sleep-intraday.md | 53 ++----------------- docs/features/fitbit-sleep-summary.md | 50 +---------------- docs/features/fitbit-steps-intraday.md | 28 +--------- docs/features/fitbit-steps-summary.md | 28 +--------- rules/features.smk | 12 ++--- src/data/datetime/readable_datetime.R | 2 +- src/data/streams/pull_phone_data.R | 2 +- src/data/streams/pull_wearable_data.R | 2 +- .../episodes/sleep_intraday_episodes.py | 37 +++++++++++++ 13 files changed, 63 insertions(+), 215 deletions(-) create mode 100644 src/features/fitbit_sleep_intraday/episodes/sleep_intraday_episodes.py diff --git a/Snakefile b/Snakefile index e076c0af..862d14f7 100644 --- a/Snakefile +++ b/Snakefile @@ -217,8 +217,7 @@ for provider in config["PHONE_LOCATIONS"]["PROVIDERS"].keys(): for provider in config["FITBIT_DATA_YIELD"]["PROVIDERS"].keys(): if config["FITBIT_DATA_YIELD"]["PROVIDERS"][provider]["COMPUTE"]: files_to_compute.extend(expand("data/raw/{pid}/fitbit_heartrate_intraday_raw.csv", pid=config["PIDS"])) - files_to_compute.extend(expand("data/raw/{pid}/fitbit_heartrate_intraday_parsed.csv", pid=config["PIDS"])) - files_to_compute.extend(expand("data/raw/{pid}/fitbit_heartrate_intraday_parsed_with_datetime.csv", pid=config["PIDS"])) + files_to_compute.extend(expand("data/raw/{pid}/fitbit_heartrate_intraday_with_datetime.csv", pid=config["PIDS"])) files_to_compute.extend(expand("data/processed/features/{pid}/fitbit_data_yield.csv", pid=config["PIDS"])) files_to_compute.extend(expand("data/processed/features/{pid}/all_sensor_features.csv", pid=config["PIDS"])) files_to_compute.append("data/processed/features/all_participants/all_sensor_features.csv") @@ -253,6 +252,8 @@ for provider in config["FITBIT_SLEEP_SUMMARY"]["PROVIDERS"].keys(): for provider in config["FITBIT_SLEEP_INTRADAY"]["PROVIDERS"].keys(): if config["FITBIT_SLEEP_INTRADAY"]["PROVIDERS"][provider]["COMPUTE"]: files_to_compute.extend(expand("data/raw/{pid}/fitbit_sleep_intraday_raw.csv", pid=config["PIDS"])) + files_to_compute.extend(expand("data/raw/{pid}/fitbit_sleep_intraday_with_datetime.csv", pid=config["PIDS"])) + files_to_compute.extend(expand("data/interim/{pid}/fitbit_sleep_intraday_episodes.csv", pid=config["PIDS"])) files_to_compute.extend(expand("data/interim/{pid}/fitbit_sleep_intraday_episodes_resampled.csv", pid=config["PIDS"])) files_to_compute.extend(expand("data/interim/{pid}/fitbit_sleep_intraday_episodes_resampled_with_datetime.csv", pid=config["PIDS"])) files_to_compute.extend(expand("data/interim/{pid}/fitbit_sleep_intraday_features/fitbit_sleep_intraday_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["FITBIT_SLEEP_INTRADAY"]["PROVIDERS"][provider]["SRC_LANGUAGE"].lower(), provider_key=provider.lower())) diff --git a/docs/features/fitbit-data-yield.md b/docs/features/fitbit-data-yield.md index 0caa6793..0f0751d1 100644 --- a/docs/features/fitbit-data-yield.md +++ b/docs/features/fitbit-data-yield.md @@ -23,8 +23,7 @@ Before explaining the data yield features, let's define the following relevant c !!! info "File Sequence" ```bash - data/raw/{pid}/fitbit_heartrate_intraday_raw.csv - - data/raw/{pid}/fitbit_heartrate_intraday_parsed.csv - - data/raw/{pid}/fitbit_heartrate_intraday_parsed_with_datetime.csv + - data/raw/{pid}/fitbit_heartrate_intraday_with_datetime.csv - data/interim/{pid}/fitbit_data_yield_features/fitbit_data_yield_{language}_{provider_key}.csv - data/processed/features/{pid}/fitbit_data_yield.csv ``` diff --git a/docs/features/fitbit-heartrate-intraday.md b/docs/features/fitbit-heartrate-intraday.md index d1bd4196..6338d863 100644 --- a/docs/features/fitbit-heartrate-intraday.md +++ b/docs/features/fitbit-heartrate-intraday.md @@ -4,30 +4,7 @@ Sensor parameters description for `[FITBIT_HEARTRATE_INTRADAY]`: |Key                              | Description | |----------------|----------------------------------------------------------------------------------------------------------------------------------- -|`[TABLE]`| Database table name or file path where the heart rate intraday data is stored. The configuration keys in [Device Data Source Configuration](../../setup/configuration/#device-data-source-configuration) control whether this parameter is interpreted as table or file. - -The format of the column(s) containing the Fitbit sensor data can be `JSON` or `PLAIN_TEXT`. The data in `JSON` format is obtained directly from the Fitbit API. We support `PLAIN_TEXT` in case you already parsed your data and don't have access to your participants' Fitbit accounts anymore. If your data is in `JSON` format then summary and intraday data come packed together. - -We provide examples of the input format that RAPIDS expects, note that both examples for `JSON` and `PLAIN_TEXT` are tabular and the actual format difference comes in the `fitbit_data` column (we truncate the `JSON` example for brevity). - -??? example "Example of the structure of source data" - - === "JSON" - - |device_id |fitbit_data | - |---------------------------------------- |--------------------------------------------------------- | - |a748ee1a-1d0b-4ae9-9074-279a2b6ba524 |{"activities-heart":[{"dateTime":"2020-10-07","value":{"customHeartRateZones":[],"heartRateZones":[{"caloriesOut":1200.6102,"max":88,"min":31,"minutes":1058,"name":"Out of Range"},{"caloriesOut":760.3020,"max":120,"min":86,"minutes":366,"name":"Fat Burn"},{"caloriesOut":15.2048,"max":146,"min":120,"minutes":2,"name":"Cardio"},{"caloriesOut":0,"max":221,"min":148,"minutes":0,"name":"Peak"}],"restingHeartRate":72}}],"activities-heart-intraday":{"dataset":[{"time":"00:00:00","value":68},{"time":"00:01:00","value":67},{"time":"00:02:00","value":67},...],"datasetInterval":1,"datasetType":"minute"}} - |a748ee1a-1d0b-4ae9-9074-279a2b6ba524 |{"activities-heart":[{"dateTime":"2020-10-08","value":{"customHeartRateZones":[],"heartRateZones":[{"caloriesOut":1100.1120,"max":89,"min":30,"minutes":921,"name":"Out of Range"},{"caloriesOut":660.0012,"max":118,"min":82,"minutes":361,"name":"Fat Burn"},{"caloriesOut":23.7088,"max":142,"min":108,"minutes":3,"name":"Cardio"},{"caloriesOut":0,"max":221,"min":148,"minutes":0,"name":"Peak"}],"restingHeartRate":70}}],"activities-heart-intraday":{"dataset":[{"time":"00:00:00","value":77},{"time":"00:01:00","value":75},{"time":"00:02:00","value":73},...],"datasetInterval":1,"datasetType":"minute"}} - |a748ee1a-1d0b-4ae9-9074-279a2b6ba524 |{"activities-heart":[{"dateTime":"2020-10-09","value":{"customHeartRateZones":[],"heartRateZones":[{"caloriesOut":750.3615,"max":77,"min":30,"minutes":851,"name":"Out of Range"},{"caloriesOut":734.1516,"max":107,"min":77,"minutes":550,"name":"Fat Burn"},{"caloriesOut":131.8579,"max":130,"min":107,"minutes":29,"name":"Cardio"},{"caloriesOut":0,"max":220,"min":130,"minutes":0,"name":"Peak"}],"restingHeartRate":69}}],"activities-heart-intraday":{"dataset":[{"time":"00:00:00","value":90},{"time":"00:01:00","value":89},{"time":"00:02:00","value":88},...],"datasetInterval":1,"datasetType":"minute"}} - - === "PLAIN_TEXT" - All columns are mandatory, however, all except `device_id` and `local_date_time` can be empty if you don't have that data. Just have in mind that some features will be empty if some of these columns are empty. - - |device_id |local_date_time |heartrate |heartrate_zone | - |-------------------------------------- |---------------------- |--------- |--------------- | - |a748ee1a-1d0b-4ae9-9074-279a2b6ba524 |2020-10-07 00:00:00 |68 |outofrange | - |a748ee1a-1d0b-4ae9-9074-279a2b6ba524 |2020-10-07 00:01:00 |67 |outofrange | - |a748ee1a-1d0b-4ae9-9074-279a2b6ba524 |2020-10-07 00:02:00 |67 |outofrange | +|`[CONTAINER]`| Container where your heart rate intraday data is stored, depending on the data stream you are using this can be a database table, a CSV file, etc. | ## RAPIDS provider @@ -38,8 +15,7 @@ We provide examples of the input format that RAPIDS expects, note that both exam !!! info "File Sequence" ```bash - data/raw/{pid}/fitbit_heartrate_intraday_raw.csv - - data/raw/{pid}/fitbit_heartrate_intraday_parsed.csv - - data/raw/{pid}/fitbit_heartrate_intraday_parsed_with_datetime.csv + - data/raw/{pid}/fitbit_heartrate_intraday_with_datetime.csv - data/interim/{pid}/fitbit_heartrate_intraday_features/fitbit_heartrate_intraday_{language}_{provider_key}.csv - data/processed/features/{pid}/fitbit_heartrate_intraday.csv ``` diff --git a/docs/features/fitbit-heartrate-summary.md b/docs/features/fitbit-heartrate-summary.md index 3a0f341b..ebb7aa41 100644 --- a/docs/features/fitbit-heartrate-summary.md +++ b/docs/features/fitbit-heartrate-summary.md @@ -4,30 +4,7 @@ Sensor parameters description for `[FITBIT_HEARTRATE_SUMMARY]`: |Key                              | Description | |----------------|----------------------------------------------------------------------------------------------------------------------------------- -|`[TABLE]`| Database table name or file path where the heart rate summary data is stored. The configuration keys in [Device Data Source Configuration](../../setup/configuration/#device-data-source-configuration) control whether this parameter is interpreted as table or file. - -The format of the column(s) containing the Fitbit sensor data can be `JSON` or `PLAIN_TEXT`. The data in `JSON` format is obtained directly from the Fitbit API. We support `PLAIN_TEXT` in case you already parsed your data and don't have access to your participants' Fitbit accounts anymore. If your data is in `JSON` format then summary and intraday data come packed together. - -We provide examples of the input format that RAPIDS expects, note that both examples for `JSON` and `PLAIN_TEXT` are tabular and the actual format difference comes in the `fitbit_data` column (we truncate the `JSON` example for brevity). - -??? example "Example of the structure of source data" - - === "JSON" - - |device_id |fitbit_data | - |---------------------------------------- |--------------------------------------------------------- | - |a748ee1a-1d0b-4ae9-9074-279a2b6ba524 |{"activities-heart":[{"dateTime":"2020-10-07","value":{"customHeartRateZones":[],"heartRateZones":[{"caloriesOut":1200.6102,"max":88,"min":31,"minutes":1058,"name":"Out of Range"},{"caloriesOut":760.3020,"max":120,"min":86,"minutes":366,"name":"Fat Burn"},{"caloriesOut":15.2048,"max":146,"min":120,"minutes":2,"name":"Cardio"},{"caloriesOut":0,"max":221,"min":148,"minutes":0,"name":"Peak"}],"restingHeartRate":72}}],"activities-heart-intraday":{"dataset":[{"time":"00:00:00","value":68},{"time":"00:01:00","value":67},{"time":"00:02:00","value":67},...],"datasetInterval":1,"datasetType":"minute"}} - |a748ee1a-1d0b-4ae9-9074-279a2b6ba524 |{"activities-heart":[{"dateTime":"2020-10-08","value":{"customHeartRateZones":[],"heartRateZones":[{"caloriesOut":1100.1120,"max":89,"min":30,"minutes":921,"name":"Out of Range"},{"caloriesOut":660.0012,"max":118,"min":82,"minutes":361,"name":"Fat Burn"},{"caloriesOut":23.7088,"max":142,"min":108,"minutes":3,"name":"Cardio"},{"caloriesOut":0,"max":221,"min":148,"minutes":0,"name":"Peak"}],"restingHeartRate":70}}],"activities-heart-intraday":{"dataset":[{"time":"00:00:00","value":77},{"time":"00:01:00","value":75},{"time":"00:02:00","value":73},...],"datasetInterval":1,"datasetType":"minute"}} - |a748ee1a-1d0b-4ae9-9074-279a2b6ba524 |{"activities-heart":[{"dateTime":"2020-10-09","value":{"customHeartRateZones":[],"heartRateZones":[{"caloriesOut":750.3615,"max":77,"min":30,"minutes":851,"name":"Out of Range"},{"caloriesOut":734.1516,"max":107,"min":77,"minutes":550,"name":"Fat Burn"},{"caloriesOut":131.8579,"max":130,"min":107,"minutes":29,"name":"Cardio"},{"caloriesOut":0,"max":220,"min":130,"minutes":0,"name":"Peak"}],"restingHeartRate":69}}],"activities-heart-intraday":{"dataset":[{"time":"00:00:00","value":90},{"time":"00:01:00","value":89},{"time":"00:02:00","value":88},...],"datasetInterval":1,"datasetType":"minute"}} - - === "PLAIN_TEXT" - All columns are mandatory, however, all except `device_id` and `local_date_time` can be empty if you don't have that data. Just have in mind that some features will be empty if some of these columns are empty. - - |device_id |local_date_time |heartrate_daily_restinghr |heartrate_daily_caloriesoutofrange |heartrate_daily_caloriesfatburn |heartrate_daily_caloriescardio |heartrate_daily_caloriespeak | - |-------------------------------------- |----------------- |------- |-------------- |------------- |------------ |-------| - |a748ee1a-1d0b-4ae9-9074-279a2b6ba524 |2020-10-07 |72 |1200.6102 |760.3020 |15.2048 |0 | - |a748ee1a-1d0b-4ae9-9074-279a2b6ba524 |2020-10-08 |70 |1100.1120 |660.0012 |23.7088 |0 | - |a748ee1a-1d0b-4ae9-9074-279a2b6ba524 |2020-10-09 |69 |750.3615 |734.1516 |131.8579 |0 | +|`[CONTAINER]`| Container where your heart rate summary data is stored, depending on the data stream you are using this can be a database table, a CSV file, etc. | ## RAPIDS provider @@ -38,8 +15,7 @@ We provide examples of the input format that RAPIDS expects, note that both exam !!! info "File Sequence" ```bash - data/raw/{pid}/fitbit_heartrate_summary_raw.csv - - data/raw/{pid}/fitbit_heartrate_summary_parsed.csv - - data/raw/{pid}/fitbit_heartrate_summary_parsed_with_datetime.csv + - data/raw/{pid}/fitbit_heartrate_summary_with_datetime.csv - data/interim/{pid}/fitbit_heartrate_summary_features/fitbit_heartrate_summary_{language}_{provider_key}.csv - data/processed/features/{pid}/fitbit_heartrate_summary.csv ``` diff --git a/docs/features/fitbit-sleep-intraday.md b/docs/features/fitbit-sleep-intraday.md index ec679850..9661eb2e 100644 --- a/docs/features/fitbit-sleep-intraday.md +++ b/docs/features/fitbit-sleep-intraday.md @@ -4,55 +4,7 @@ Sensor parameters description for `[FITBIT_SLEEP_INTRADAY]`: |Key                              | Description | |----------------|----------------------------------------------------------------------------------------------------------------------------------- -|`[TABLE]`| Database table name or file path where the sleep intraday data is stored. The configuration keys in [Device Data Source Configuration](../../setup/configuration/#device-data-source-configuration) control whether this parameter is interpreted as table or file. - - -The format of the column(s) containing the Fitbit sensor data can be `JSON` or `PLAIN_TEXT`. The data in `JSON` format is obtained directly from the Fitbit API. We support `PLAIN_TEXT` in case you already parsed your data and don't have access to your participants' Fitbit accounts anymore. If your data is in `JSON` format then summary and intraday data come packed together. - -We provide examples of the input format that RAPIDS expects, note that both examples for `JSON` and `PLAIN_TEXT` are tabular and the actual format difference comes in the `fitbit_data` column (we truncate the `JSON` example for brevity). - -??? example "Example of the structure of source data with Fitbit’s sleep API Version 1" - - === "JSON" - - |device_id |fitbit_data | - |---------------------------------------- |--------------------------------------------------------- | - |a748ee1a-1d0b-4ae9-9074-279a2b6ba524 |{"sleep": [{"awakeCount": 2, "awakeDuration": 3, "awakeningsCount": 10, "dateOfSleep": "2020-10-07", "duration": 8100000, "efficiency": 91, "endTime": "2020-10-07T18:10:00.000", "isMainSleep": true, "logId": 14147921940, "minuteData": [{"dateTime": "15:55:00", "value": "3"}, {"dateTime": "15:56:00", "value": "3"}, {"dateTime": "15:57:00", "value": "2"},...], "minutesAfterWakeup": 0, "minutesAsleep": 123, "minutesAwake": 12, "minutesToFallAsleep": 0, "restlessCount": 8, "restlessDuration": 9, "startTime": "2020-10-07T15:55:00.000", "timeInBed": 135}, {"awakeCount": 0, "awakeDuration": 0, "awakeningsCount": 1, "dateOfSleep": "2020-10-07", "duration": 3780000, "efficiency": 100, "endTime": "2020-10-07T10:52:30.000", "isMainSleep": false, "logId": 14144903977, "minuteData": [{"dateTime": "09:49:00", "value": "1"}, {"dateTime": "09:50:00", "value": "1"}, {"dateTime": "09:51:00", "value": "1"},...], "minutesAfterWakeup": 1, "minutesAsleep": 62, "minutesAwake": 0, "minutesToFallAsleep": 0, "restlessCount": 1, "restlessDuration": 1, "startTime": "2020-10-07T09:49:00.000", "timeInBed": 63}], "summary": {"totalMinutesAsleep": 185, "totalSleepRecords": 2, "totalTimeInBed": 198}} - |a748ee1a-1d0b-4ae9-9074-279a2b6ba524 |{"sleep": [{"awakeCount": 3, "awakeDuration": 21, "awakeningsCount": 16, "dateOfSleep": "2020-10-08", "duration": 19260000, "efficiency": 89, "endTime": "2020-10-08T06:01:30.000", "isMainSleep": true, "logId": 14150613895, "minuteData": [{"dateTime": "00:40:00", "value": "3"}, {"dateTime": "00:41:00", "value": "3"}, {"dateTime": "00:42:00", "value": "3"},...], "minutesAfterWakeup": 0, "minutesAsleep": 275, "minutesAwake": 33, "minutesToFallAsleep": 0, "restlessCount": 13, "restlessDuration": 25, "startTime": "2020-10-08T00:40:00.000", "timeInBed": 321}], "summary": {"totalMinutesAsleep": 275, "totalSleepRecords": 1, "totalTimeInBed": 321}} - |a748ee1a-1d0b-4ae9-9074-279a2b6ba524 |{"sleep": [{"awakeCount": 1, "awakeDuration": 3, "awakeningsCount": 8, "dateOfSleep": "2020-10-09", "duration": 19320000, "efficiency": 96, "endTime": "2020-10-09T05:57:30.000", "isMainSleep": true, "logId": 14161136803, "minuteData": [{"dateTime": "00:35:30", "value": "2"}, {"dateTime": "00:36:30", "value": "1"}, {"dateTime": "00:37:30", "value": "1"},...], "minutesAfterWakeup": 0, "minutesAsleep": 309, "minutesAwake": 13, "minutesToFallAsleep": 0, "restlessCount": 7, "restlessDuration": 10, "startTime": "2020-10-09T00:35:30.000", "timeInBed": 322}], "summary": {"totalMinutesAsleep": 309, "totalSleepRecords": 1, "totalTimeInBed": 322}} - - === "PLAIN_TEXT" - - All columns are mandatory, however, all except `device_id`, `local_date_time` and `duration` can be empty if you don't have that data. Just have in mind that some features might be inaccurate or empty as `type_episode_id`, `level`, `is_main_sleep`, and `type` are used for sleep episodes extraction. `type_episode_id` is based on where it is extracted: if it is extracted from the 1st "minutesData" block, the `type_episode_id` field will be 0. Similarly, the kth block will be k-1. Actually, you only need to make sure rows extracted from the same "minutesData" block are assigned with the same unique `type_episode_id` value. - - |device_id |type_episode_id |local_date_time |duration |level |is_main_sleep |type | - |------------------------------------ |---------------- |------------------- |--------- |---------- |-------------- |-------------- | - |a748ee1a-1d0b-4ae9-9074-279a2b6ba524 |0 |2020-10-07 15:55:00 |60 |awake |0 |classic | - |a748ee1a-1d0b-4ae9-9074-279a2b6ba524 |0 |2020-10-07 15:56:00 |60 |awake |0 |classic | - |a748ee1a-1d0b-4ae9-9074-279a2b6ba524 |0 |2020-10-07 15:57:00 |60 |restless |0 |classic | - -??? example "Example of the structure of source data with Fitbit’s sleep API Version 1.2" - - === "JSON" - - |device_id |fitbit_data | - |---------------------------------------- |--------------------------------------------------------- | - |a748ee1a-1d0b-4ae9-9074-279a2b6ba524 |{"sleep":[{"dateOfSleep":"2020-10-10","duration":3600000,"efficiency":92,"endTime":"2020-10-10T16:37:00.000","infoCode":2,"isMainSleep":false,"levels":{"data":[{"dateTime":"2020-10-10T15:36:30.000","level":"restless","seconds":60},{"dateTime":"2020-10-10T15:37:30.000","level":"asleep","seconds":660},{"dateTime":"2020-10-10T15:48:30.000","level":"restless","seconds":60},...], "summary":{"asleep":{"count":0,"minutes":56},"awake":{"count":0,"minutes":0},"restless":{"count":3,"minutes":4}}},"logId":26315914306,"minutesAfterWakeup":0,"minutesAsleep":55,"minutesAwake":5,"minutesToFallAsleep":0,"startTime":"2020-10-10T15:36:30.000","timeInBed":60,"type":"classic"},{"dateOfSleep":"2020-10-10","duration":22980000,"efficiency":88,"endTime":"2020-10-10T08:10:00.000","infoCode":0,"isMainSleep":true,"levels":{"data":[{"dateTime":"2020-10-10T01:46:30.000","level":"light","seconds":420},{"dateTime":"2020-10-10T01:53:30.000","level":"deep","seconds":1230},{"dateTime":"2020-10-10T02:14:00.000","level":"light","seconds":360},...], "summary":{"deep":{"count":3,"minutes":92,"thirtyDayAvgMinutes":0},"light":{"count":29,"minutes":193,"thirtyDayAvgMinutes":0},"rem":{"count":4,"minutes":33,"thirtyDayAvgMinutes":0},"wake":{"count":28,"minutes":65,"thirtyDayAvgMinutes":0}}},"logId":26311786557,"minutesAfterWakeup":0,"minutesAsleep":318,"minutesAwake":65,"minutesToFallAsleep":0,"startTime":"2020-10-10T01:46:30.000","timeInBed":383,"type":"stages"}],"summary":{"stages":{"deep":92,"light":193,"rem":33,"wake":65},"totalMinutesAsleep":373,"totalSleepRecords":2,"totalTimeInBed":443}} - |a748ee1a-1d0b-4ae9-9074-279a2b6ba524 |{"sleep":[{"dateOfSleep":"2020-10-11","duration":41640000,"efficiency":89,"endTime":"2020-10-11T11:47:00.000","infoCode":0,"isMainSleep":true,"levels":{"data":[{"dateTime":"2020-10-11T00:12:30.000","level":"wake","seconds":450},{"dateTime":"2020-10-11T00:20:00.000","level":"light","seconds":870},{"dateTime":"2020-10-11T00:34:30.000","level":"wake","seconds":780},...], "summary":{"deep":{"count":4,"minutes":52,"thirtyDayAvgMinutes":62},"light":{"count":32,"minutes":442,"thirtyDayAvgMinutes":364},"rem":{"count":6,"minutes":68,"thirtyDayAvgMinutes":58},"wake":{"count":29,"minutes":132,"thirtyDayAvgMinutes":94}}},"logId":26589710670,"minutesAfterWakeup":1,"minutesAsleep":562,"minutesAwake":132,"minutesToFallAsleep":0,"startTime":"2020-10-11T00:12:30.000","timeInBed":694,"type":"stages"}],"summary":{"stages":{"deep":52,"light":442,"rem":68,"wake":132},"totalMinutesAsleep":562,"totalSleepRecords":1,"totalTimeInBed":694}} - |a748ee1a-1d0b-4ae9-9074-279a2b6ba524 |{"sleep":[{"dateOfSleep":"2020-10-12","duration":28980000,"efficiency":93,"endTime":"2020-10-12T09:34:30.000","infoCode":0,"isMainSleep":true,"levels":{"data":[{"dateTime":"2020-10-12T01:31:00.000","level":"wake","seconds":600},{"dateTime":"2020-10-12T01:41:00.000","level":"light","seconds":60},{"dateTime":"2020-10-12T01:42:00.000","level":"deep","seconds":2340},...], "summary":{"deep":{"count":4,"minutes":63,"thirtyDayAvgMinutes":59},"light":{"count":27,"minutes":257,"thirtyDayAvgMinutes":364},"rem":{"count":5,"minutes":94,"thirtyDayAvgMinutes":58},"wake":{"count":24,"minutes":69,"thirtyDayAvgMinutes":95}}},"logId":26589710673,"minutesAfterWakeup":0,"minutesAsleep":415,"minutesAwake":68,"minutesToFallAsleep":0,"startTime":"2020-10-12T01:31:00.000","timeInBed":483,"type":"stages"}],"summary":{"stages":{"deep":63,"light":257,"rem":94,"wake":69},"totalMinutesAsleep":415,"totalSleepRecords":1,"totalTimeInBed":483}} - - === "PLAIN_TEXT" - - All columns are mandatory, however, all except `device_id`, `local_date_time` and `duration` can be empty if you don't have that data. Just have in mind that some features might be inaccurate or empty as `type_episode_id`, `level`, `is_main_sleep`, and `type` are used for sleep episodes extraction. `type_episode_id` is based on where it is extracted: if it is extracted from the 1st "data" and "shortData" block, the `type_episode_id` field will be 0. Similarly, the kth block will be k-1. Actually, you only need to make sure rows extracted from the same "minutesData" block are assigned with the same unique `type_episode_id` value. - - |device_id |type_episode_id |local_date_time |duration |level |is_main_sleep |type | - |------------------------------------ |---------------- |------------------- |--------- |---------- |-------------- |-------------- | - |a748ee1a-1d0b-4ae9-9074-279a2b6ba524 |0 |2020-10-10 15:36:30 |60 |restless |0 |classic | - |a748ee1a-1d0b-4ae9-9074-279a2b6ba524 |0 |2020-10-10 15:37:30 |660 |asleep |0 |classic | - |a748ee1a-1d0b-4ae9-9074-279a2b6ba524 |0 |2020-10-10 15:48:30 |60 |restless |0 |classic | - |a748ee1a-1d0b-4ae9-9074-279a2b6ba524 |... |... |... |... |... |... | - |a748ee1a-1d0b-4ae9-9074-279a2b6ba524 |1 |2020-10-10 01:46:30 |420 |light |1 |stages | - |a748ee1a-1d0b-4ae9-9074-279a2b6ba524 |1 |2020-10-10 01:53:30 |1230 |deep |1 |stages | +|`[CONTAINER]`| Container where your sleep intraday data is stored, depending on the data stream you are using this can be a database table, a CSV file, etc. | ## RAPIDS provider @@ -62,7 +14,8 @@ We provide examples of the input format that RAPIDS expects, note that both exam !!! info "File Sequence" ```bash - data/raw/{pid}/fitbit_sleep_intraday_raw.csv - - data/raw/{pid}/fitbit_sleep_intraday_parsed.csv + - data/raw/{pid}/fitbit_sleep_intraday_with_datetime.csv + - data/interim/{pid}/fitbit_sleep_intraday_episodes.csv - data/interim/{pid}/fitbit_sleep_intraday_episodes_resampled.csv - data/interim/{pid}/fitbit_sleep_intraday_episodes_resampled_with_datetime.csv - data/interim/{pid}/fitbit_sleep_intraday_features/fitbit_sleep_intraday_{language}_{provider_key}.csv diff --git a/docs/features/fitbit-sleep-summary.md b/docs/features/fitbit-sleep-summary.md index 8e7f3834..515cb6d8 100644 --- a/docs/features/fitbit-sleep-summary.md +++ b/docs/features/fitbit-sleep-summary.md @@ -4,52 +4,7 @@ Sensor parameters description for `[FITBIT_SLEEP_SUMMARY]`: |Key                              | Description | |----------------|----------------------------------------------------------------------------------------------------------------------------------- -|`[TABLE]`| Database table name or file path where the sleep summary data is stored. The configuration keys in [Device Data Source Configuration](../../setup/configuration/#device-data-source-configuration) control whether this parameter is interpreted as table or file. - -The format of the column(s) containing the Fitbit sensor data can be `JSON` or `PLAIN_TEXT`. The data in `JSON` format is obtained directly from the Fitbit API. We support `PLAIN_TEXT` in case you already parsed your data and don't have access to your participants' Fitbit accounts anymore. If your data is in `JSON` format then summary and intraday data come packed together. - -We provide examples of the input format that RAPIDS expects, note that both examples for `JSON` and `PLAIN_TEXT` are tabular and the actual format difference comes in the `fitbit_data` column (we truncate the `JSON` example for brevity). - -??? example "Example of the structure of source data with Fitbit’s sleep API Version 1" - - === "JSON" - - |device_id |fitbit_data | - |---------------------------------------- |--------------------------------------------------------- | - |a748ee1a-1d0b-4ae9-9074-279a2b6ba524 |{"sleep": [{"awakeCount": 2, "awakeDuration": 3, "awakeningsCount": 10, "dateOfSleep": "2020-10-07", "duration": 8100000, "efficiency": 91, "endTime": "2020-10-07T18:10:00.000", "isMainSleep": true, "logId": 14147921940, "minuteData": [{"dateTime": "15:55:00", "value": "3"}, {"dateTime": "15:56:00", "value": "3"}, {"dateTime": "15:57:00", "value": "2"},...], "minutesAfterWakeup": 0, "minutesAsleep": 123, "minutesAwake": 12, "minutesToFallAsleep": 0, "restlessCount": 8, "restlessDuration": 9, "startTime": "2020-10-07T15:55:00.000", "timeInBed": 135}, {"awakeCount": 0, "awakeDuration": 0, "awakeningsCount": 1, "dateOfSleep": "2020-10-07", "duration": 3780000, "efficiency": 100, "endTime": "2020-10-07T10:52:30.000", "isMainSleep": false, "logId": 14144903977, "minuteData": [{"dateTime": "09:49:00", "value": "1"}, {"dateTime": "09:50:00", "value": "1"}, {"dateTime": "09:51:00", "value": "1"},...], "minutesAfterWakeup": 1, "minutesAsleep": 62, "minutesAwake": 0, "minutesToFallAsleep": 0, "restlessCount": 1, "restlessDuration": 1, "startTime": "2020-10-07T09:49:00.000", "timeInBed": 63}], "summary": {"totalMinutesAsleep": 185, "totalSleepRecords": 2, "totalTimeInBed": 198}} - |a748ee1a-1d0b-4ae9-9074-279a2b6ba524 |{"sleep": [{"awakeCount": 3, "awakeDuration": 21, "awakeningsCount": 16, "dateOfSleep": "2020-10-08", "duration": 19260000, "efficiency": 89, "endTime": "2020-10-08T06:01:30.000", "isMainSleep": true, "logId": 14150613895, "minuteData": [{"dateTime": "00:40:00", "value": "3"}, {"dateTime": "00:41:00", "value": "3"}, {"dateTime": "00:42:00", "value": "3"},...], "minutesAfterWakeup": 0, "minutesAsleep": 275, "minutesAwake": 33, "minutesToFallAsleep": 0, "restlessCount": 13, "restlessDuration": 25, "startTime": "2020-10-08T00:40:00.000", "timeInBed": 321}], "summary": {"totalMinutesAsleep": 275, "totalSleepRecords": 1, "totalTimeInBed": 321}} - |a748ee1a-1d0b-4ae9-9074-279a2b6ba524 |{"sleep": [{"awakeCount": 1, "awakeDuration": 3, "awakeningsCount": 8, "dateOfSleep": "2020-10-09", "duration": 19320000, "efficiency": 96, "endTime": "2020-10-09T05:57:30.000", "isMainSleep": true, "logId": 14161136803, "minuteData": [{"dateTime": "00:35:30", "value": "2"}, {"dateTime": "00:36:30", "value": "1"}, {"dateTime": "00:37:30", "value": "1"},...], "minutesAfterWakeup": 0, "minutesAsleep": 309, "minutesAwake": 13, "minutesToFallAsleep": 0, "restlessCount": 7, "restlessDuration": 10, "startTime": "2020-10-09T00:35:30.000", "timeInBed": 322}], "summary": {"totalMinutesAsleep": 309, "totalSleepRecords": 1, "totalTimeInBed": 322}} - - === "PLAIN_TEXT" - - All columns are mandatory, however, all except `device_id` and `local_date_time` can be empty if you don't have that data. Just have in mind that some features will be empty if some of these columns are empty. - - |device_id |local_start_date_time |local_end_date_time |efficiency |minutes_after_wakeup |minutes_asleep |minutes_awake |minutes_to_fall_asleep |minutes_in_bed |is_main_sleep |type |count_awake |duration_awake |count_awakenings |count_restless |duration_restless | - |-------------------------------------- |---------------------- |---------------------- |----------- |--------------------- |--------------- |-------------- |----------------------- |--------------- |-------------- |-------- |----------- |--------------- |----------------- |--------------- |------------------ | - |a748ee1a-1d0b-4ae9-9074-279a2b6ba524 |2020-10-07 15:55:00 |2020-10-07 18:10:00 |91 |0 |123 |12 |0 |135 |1 |classic |2 |3 |10 |8 |9 | - |a748ee1a-1d0b-4ae9-9074-279a2b6ba524 |2020-10-07 09:49:00 |2020-10-07 10:52:30 |100 |1 |62 |0 |0 |63 |0 |classic |0 |0 |1 |1 |1 | - |a748ee1a-1d0b-4ae9-9074-279a2b6ba524 |2020-10-08 00:40:00 |2020-10-08 06:01:30 |89 |0 |275 |33 |0 |321 |1 |classic |3 |21 |16 |13 |25 | - |a748ee1a-1d0b-4ae9-9074-279a2b6ba524 |2020-10-09 00:35:30 |2020-10-09 05:57:30 |96 |0 |309 |13 |0 |322 |1 |classic |1 |3 |8 |7 |10 | - -??? example "Example of the structure of source data with Fitbit’s sleep API Version 1.2" - - === "JSON" - - |device_id |fitbit_data | - |---------------------------------------- |--------------------------------------------------------- | - |a748ee1a-1d0b-4ae9-9074-279a2b6ba524 |{"sleep":[{"dateOfSleep":"2020-10-10","duration":3600000,"efficiency":92,"endTime":"2020-10-10T16:37:00.000","infoCode":2,"isMainSleep":false,"levels":{"data":[{"dateTime":"2020-10-10T15:36:30.000","level":"restless","seconds":60},{"dateTime":"2020-10-10T15:37:30.000","level":"asleep","seconds":660},{"dateTime":"2020-10-10T15:48:30.000","level":"restless","seconds":60},...], "summary":{"asleep":{"count":0,"minutes":56},"awake":{"count":0,"minutes":0},"restless":{"count":3,"minutes":4}}},"logId":26315914306,"minutesAfterWakeup":0,"minutesAsleep":55,"minutesAwake":5,"minutesToFallAsleep":0,"startTime":"2020-10-10T15:36:30.000","timeInBed":60,"type":"classic"},{"dateOfSleep":"2020-10-10","duration":22980000,"efficiency":88,"endTime":"2020-10-10T08:10:00.000","infoCode":0,"isMainSleep":true,"levels":{"data":[{"dateTime":"2020-10-10T01:46:30.000","level":"light","seconds":420},{"dateTime":"2020-10-10T01:53:30.000","level":"deep","seconds":1230},{"dateTime":"2020-10-10T02:14:00.000","level":"light","seconds":360},...], "summary":{"deep":{"count":3,"minutes":92,"thirtyDayAvgMinutes":0},"light":{"count":29,"minutes":193,"thirtyDayAvgMinutes":0},"rem":{"count":4,"minutes":33,"thirtyDayAvgMinutes":0},"wake":{"count":28,"minutes":65,"thirtyDayAvgMinutes":0}}},"logId":26311786557,"minutesAfterWakeup":0,"minutesAsleep":318,"minutesAwake":65,"minutesToFallAsleep":0,"startTime":"2020-10-10T01:46:30.000","timeInBed":383,"type":"stages"}],"summary":{"stages":{"deep":92,"light":193,"rem":33,"wake":65},"totalMinutesAsleep":373,"totalSleepRecords":2,"totalTimeInBed":443}} - |a748ee1a-1d0b-4ae9-9074-279a2b6ba524 |{"sleep":[{"dateOfSleep":"2020-10-11","duration":41640000,"efficiency":89,"endTime":"2020-10-11T11:47:00.000","infoCode":0,"isMainSleep":true,"levels":{"data":[{"dateTime":"2020-10-11T00:12:30.000","level":"wake","seconds":450},{"dateTime":"2020-10-11T00:20:00.000","level":"light","seconds":870},{"dateTime":"2020-10-11T00:34:30.000","level":"wake","seconds":780},...], "summary":{"deep":{"count":4,"minutes":52,"thirtyDayAvgMinutes":62},"light":{"count":32,"minutes":442,"thirtyDayAvgMinutes":364},"rem":{"count":6,"minutes":68,"thirtyDayAvgMinutes":58},"wake":{"count":29,"minutes":132,"thirtyDayAvgMinutes":94}}},"logId":26589710670,"minutesAfterWakeup":1,"minutesAsleep":562,"minutesAwake":132,"minutesToFallAsleep":0,"startTime":"2020-10-11T00:12:30.000","timeInBed":694,"type":"stages"}],"summary":{"stages":{"deep":52,"light":442,"rem":68,"wake":132},"totalMinutesAsleep":562,"totalSleepRecords":1,"totalTimeInBed":694}} - |a748ee1a-1d0b-4ae9-9074-279a2b6ba524 |{"sleep":[{"dateOfSleep":"2020-10-12","duration":28980000,"efficiency":93,"endTime":"2020-10-12T09:34:30.000","infoCode":0,"isMainSleep":true,"levels":{"data":[{"dateTime":"2020-10-12T01:31:00.000","level":"wake","seconds":600},{"dateTime":"2020-10-12T01:41:00.000","level":"light","seconds":60},{"dateTime":"2020-10-12T01:42:00.000","level":"deep","seconds":2340},...], "summary":{"deep":{"count":4,"minutes":63,"thirtyDayAvgMinutes":59},"light":{"count":27,"minutes":257,"thirtyDayAvgMinutes":364},"rem":{"count":5,"minutes":94,"thirtyDayAvgMinutes":58},"wake":{"count":24,"minutes":69,"thirtyDayAvgMinutes":95}}},"logId":26589710673,"minutesAfterWakeup":0,"minutesAsleep":415,"minutesAwake":68,"minutesToFallAsleep":0,"startTime":"2020-10-12T01:31:00.000","timeInBed":483,"type":"stages"}],"summary":{"stages":{"deep":63,"light":257,"rem":94,"wake":69},"totalMinutesAsleep":415,"totalSleepRecords":1,"totalTimeInBed":483}} - - === "PLAIN_TEXT" - All columns are mandatory, however, all except `device_id` and `local_date_time` can be empty if you don't have that data. Just have in mind that some features will be empty if some of these columns are empty. - - |device_id |local_start_date_time |local_end_date_time |efficiency |minutes_after_wakeup |minutes_asleep |minutes_awake |minutes_to_fall_asleep |minutes_in_bed |is_main_sleep |type | - |-------------------------------------- |---------------------- |---------------------- |----------- |--------------------- |--------------- |-------------- |----------------------- |--------------- |-------------- |-------- | - |a748ee1a-1d0b-4ae9-9074-279a2b6ba524 |2020-10-10 15:36:30 |2020-10-10 16:37:00 |92 |0 |55 |5 |0 |60 |0 |classic | - |a748ee1a-1d0b-4ae9-9074-279a2b6ba524 |2020-10-10 01:46:30 |2020-10-10 08:10:00 |88 |0 |318 |65 |0 |383 |1 |stages | - |a748ee1a-1d0b-4ae9-9074-279a2b6ba524 |2020-10-11 00:12:30 |2020-10-11 11:47:00 |89 |1 |562 |132 |0 |694 |1 |stages | - |a748ee1a-1d0b-4ae9-9074-279a2b6ba524 |2020-10-12 01:31:00 |2020-10-12 09:34:30 |93 |0 |415 |68 |0 |483 |1 |stages | +|`[CONTAINER]`| Container where your sleep summary data is stored, depending on the data stream you are using this can be a database table, a CSV file, etc. | ## RAPIDS provider @@ -60,8 +15,7 @@ We provide examples of the input format that RAPIDS expects, note that both exam !!! info "File Sequence" ```bash - data/raw/{pid}/fitbit_sleep_summary_raw.csv - - data/raw/{pid}/fitbit_sleep_summary_parsed.csv - - data/raw/{pid}/fitbit_sleep_summary_parsed_with_datetime.csv + - data/raw/{pid}/fitbit_sleep_summary_with_datetime.csv - data/interim/{pid}/fitbit_sleep_summary_features/fitbit_sleep_summary_{language}_{provider_key}.csv - data/processed/features/{pid}/fitbit_sleep_summary.csv ``` diff --git a/docs/features/fitbit-steps-intraday.md b/docs/features/fitbit-steps-intraday.md index 63f22dbe..4b9ab6f9 100644 --- a/docs/features/fitbit-steps-intraday.md +++ b/docs/features/fitbit-steps-intraday.md @@ -4,30 +4,7 @@ Sensor parameters description for `[FITBIT_STEPS_INTRADAY]`: |Key                              | Description | |----------------|----------------------------------------------------------------------------------------------------------------------------------- -|`[TABLE]`| Database table name or file path where the steps intraday data is stored. The configuration keys in [Device Data Source Configuration](../../setup/configuration/#device-data-source-configuration) control whether this parameter is interpreted as table or file. - -The format of the column(s) containing the Fitbit sensor data can be `JSON` or `PLAIN_TEXT`. The data in `JSON` format is obtained directly from the Fitbit API. We support `PLAIN_TEXT` in case you already parsed your data and don't have access to your participants' Fitbit accounts anymore. If your data is in `JSON` format then summary and intraday data come packed together. - -We provide examples of the input format that RAPIDS expects, note that both examples for `JSON` and `PLAIN_TEXT` are tabular and the actual format difference comes in the `fitbit_data` column (we truncate the `JSON` example for brevity). - -??? example "Example of the structure of source data" - - === "JSON" - - |device_id |fitbit_data | - |---------------------------------------- |--------------------------------------------------------- | - |a748ee1a-1d0b-4ae9-9074-279a2b6ba524 |"activities-steps":[{"dateTime":"2020-10-07","value":"1775"}],"activities-steps-intraday":{"dataset":[{"time":"00:00:00","value":5},{"time":"00:01:00","value":3},{"time":"00:02:00","value":0},...],"datasetInterval":1,"datasetType":"minute"}} - |a748ee1a-1d0b-4ae9-9074-279a2b6ba524 |"activities-steps":[{"dateTime":"2020-10-08","value":"3201"}],"activities-steps-intraday":{"dataset":[{"time":"00:00:00","value":14},{"time":"00:01:00","value":11},{"time":"00:02:00","value":10},...],"datasetInterval":1,"datasetType":"minute"}} - |a748ee1a-1d0b-4ae9-9074-279a2b6ba524 |"activities-steps":[{"dateTime":"2020-10-09","value":"998"}],"activities-steps-intraday":{"dataset":[{"time":"00:00:00","value":0},{"time":"00:01:00","value":0},{"time":"00:02:00","value":0},...],"datasetInterval":1,"datasetType":"minute"}} - - === "PLAIN_TEXT" - All columns are mandatory. - - |device_id |local_date_time |steps | - |-------------------------------------- |---------------------- |--------- | - |a748ee1a-1d0b-4ae9-9074-279a2b6ba524 |2020-10-07 00:00:00 |5 | - |a748ee1a-1d0b-4ae9-9074-279a2b6ba524 |2020-10-07 00:01:00 |3 | - |a748ee1a-1d0b-4ae9-9074-279a2b6ba524 |2020-10-07 00:02:00 |0 | +|`[CONTAINER]`| Container where your steps intraday data is stored, depending on the data stream you are using this can be a database table, a CSV file, etc. | ## RAPIDS provider @@ -38,8 +15,7 @@ We provide examples of the input format that RAPIDS expects, note that both exam !!! info "File Sequence" ```bash - data/raw/{pid}/fitbit_steps_intraday_raw.csv - - data/raw/{pid}/fitbit_steps_intraday_parsed.csv - - data/raw/{pid}/fitbit_steps_intraday_parsed_with_datetime.csv + - data/raw/{pid}/fitbit_steps_intraday_with_datetime.csv - data/interim/{pid}/fitbit_steps_intraday_features/fitbit_steps_intraday_{language}_{provider_key}.csv - data/processed/features/{pid}/fitbit_steps_intraday.csv ``` diff --git a/docs/features/fitbit-steps-summary.md b/docs/features/fitbit-steps-summary.md index eca48336..452b9405 100644 --- a/docs/features/fitbit-steps-summary.md +++ b/docs/features/fitbit-steps-summary.md @@ -4,30 +4,7 @@ Sensor parameters description for `[FITBIT_STEPS_SUMMARY]`: |Key                              | Description | |----------------|----------------------------------------------------------------------------------------------------------------------------------- -|`[TABLE]`| Database table name or file path where the steps summary data is stored. The configuration keys in [Device Data Source Configuration](../../setup/configuration/#device-data-source-configuration) control whether this parameter is interpreted as table or file. - -The format of the column(s) containing the Fitbit sensor data can be `JSON` or `PLAIN_TEXT`. The data in `JSON` format is obtained directly from the Fitbit API. We support `PLAIN_TEXT` in case you already parsed your data and don't have access to your participants' Fitbit accounts anymore. If your data is in `JSON` format then summary and intraday data come packed together. - -We provide examples of the input format that RAPIDS expects, note that both examples for `JSON` and `PLAIN_TEXT` are tabular and the actual format difference comes in the `fitbit_data` column (we truncate the `JSON` example for brevity). - -??? example "Example of the structure of source data" - - === "JSON" - - |device_id |fitbit_data | - |---------------------------------------- |--------------------------------------------------------- | - |a748ee1a-1d0b-4ae9-9074-279a2b6ba524 |"activities-steps":[{"dateTime":"2020-10-07","value":"1775"}],"activities-steps-intraday":{"dataset":[{"time":"00:00:00","value":5},{"time":"00:01:00","value":3},{"time":"00:02:00","value":0},...],"datasetInterval":1,"datasetType":"minute"}} - |a748ee1a-1d0b-4ae9-9074-279a2b6ba524 |"activities-steps":[{"dateTime":"2020-10-08","value":"3201"}],"activities-steps-intraday":{"dataset":[{"time":"00:00:00","value":14},{"time":"00:01:00","value":11},{"time":"00:02:00","value":10},...],"datasetInterval":1,"datasetType":"minute"}} - |a748ee1a-1d0b-4ae9-9074-279a2b6ba524 |"activities-steps":[{"dateTime":"2020-10-09","value":"998"}],"activities-steps-intraday":{"dataset":[{"time":"00:00:00","value":0},{"time":"00:01:00","value":0},{"time":"00:02:00","value":0},...],"datasetInterval":1,"datasetType":"minute"}} - - === "PLAIN_TEXT" - All columns are mandatory. - - |device_id |local_date_time |steps | - |-------------------------------------- |---------------------- |--------- | - |a748ee1a-1d0b-4ae9-9074-279a2b6ba524 |2020-10-07 |1775 | - |a748ee1a-1d0b-4ae9-9074-279a2b6ba524 |2020-10-08 |3201 | - |a748ee1a-1d0b-4ae9-9074-279a2b6ba524 |2020-10-09 |998 | +|`[CONTAINER]`| Container where your steps summary data is stored, depending on the data stream you are using this can be a database table, a CSV file, etc. | ## RAPIDS provider @@ -38,8 +15,7 @@ We provide examples of the input format that RAPIDS expects, note that both exam !!! info "File Sequence" ```bash - data/raw/{pid}/fitbit_steps_summary_raw.csv - - data/raw/{pid}/fitbit_steps_summary_parsed.csv - - data/raw/{pid}/fitbit_steps_summary_parsed_with_datetime.csv + - data/raw/{pid}/fitbit_steps_summary_with_datetime.csv - data/interim/{pid}/fitbit_steps_summary_features/fitbit_steps_summary_{language}_{provider_key}.csv - data/processed/features/{pid}/fitbit_steps_summary.csv ``` diff --git a/rules/features.smk b/rules/features.smk index c13a4cd9..a2105145 100644 --- a/rules/features.smk +++ b/rules/features.smk @@ -506,7 +506,7 @@ rule phone_wifi_visible_r_features: rule fitbit_data_yield_python_features: input: - sensor_data = "data/raw/{pid}/fitbit_heartrate_intraday_parsed_with_datetime.csv", + sensor_data = "data/raw/{pid}/fitbit_heartrate_intraday_with_datetime.csv", time_segments_labels = "data/interim/time_segments/{pid}_time_segments_labels.csv" params: provider = lambda wildcards: config["FITBIT_DATA_YIELD"]["PROVIDERS"][wildcards.provider_key.upper()], @@ -519,7 +519,7 @@ rule fitbit_data_yield_python_features: rule fitbit_data_yield_r_features: input: - sensor_data = "data/raw/{pid}/fitbit_heartrate_intraday_parsed_with_datetime.csv", + sensor_data = "data/raw/{pid}/fitbit_heartrate_intraday_with_datetime.csv", time_segments_labels = "data/interim/time_segments/{pid}_time_segments_labels.csv" params: provider = lambda wildcards: config["FITBIT_DATA_YIELD"]["PROVIDERS"][wildcards.provider_key.upper()], @@ -660,13 +660,13 @@ rule fitbit_sleep_summary_r_features: script: "../src/features/entry.R" -rule resample_sleep_episodes: +rule sleep_intraday_episodes: input: - "data/raw/{pid}/fitbit_sleep_intraday_parsed.csv" + sleep_intraday = "data/raw/{pid}/fitbit_sleep_intraday_with_datetime.csv" output: - "data/interim/{pid}/fitbit_sleep_intraday_episodes_resampled.csv" + "data/interim/{pid}/fitbit_sleep_intraday_episodes.csv" script: - "../src/features/utils/resample_episodes.R" + "../src/features/fitbit_sleep_intraday/episodes/sleep_intraday_episodes.py" rule fitbit_sleep_intraday_python_features: input: diff --git a/src/data/datetime/readable_datetime.R b/src/data/datetime/readable_datetime.R index 117c94e6..86e57b95 100644 --- a/src/data/datetime/readable_datetime.R +++ b/src/data/datetime/readable_datetime.R @@ -49,7 +49,7 @@ validate_user_timezones <- function(timezone_parameters){ } create_mising_temporal_column <- function(data, device_type){ - if(device_type == "fitbit"){ + if(device_type == "fitbit" && all(data$timestamp == 0)){ # For fibit we infere timestamp from Fitbit's local date time if(nrow(data) == 0) return(data %>% mutate(timestamp = NA_real_)) diff --git a/src/data/streams/pull_phone_data.R b/src/data/streams/pull_phone_data.R index b0ba2244..f94f6a0b 100644 --- a/src/data/streams/pull_phone_data.R +++ b/src/data/streams/pull_phone_data.R @@ -160,7 +160,7 @@ pull_phone_data <- function(){ if(!setequal(expected_columns, colnames(mutated_data))) stop(paste0("The mutated data for ", device, " does not have the columns RAPIDS expects. The mutation script returned [", paste(colnames(mutated_data), collapse=","),"] but RAPIDS expected [",paste(expected_columns, collapse=","), "]. One ore more mutation scripts in [", sensor,"][MUTATION][SCRIPTS] are adding extra columns or removing or not adding the ones expected")) - participant_data <- rbind(participant_data, mutated_data) + participant_data <- rbind(participant_data, mutated_data %>% distinct()) } participant_data <- participant_data %>% arrange(timestamp) diff --git a/src/data/streams/pull_wearable_data.R b/src/data/streams/pull_wearable_data.R index 50184a3b..9f744231 100644 --- a/src/data/streams/pull_wearable_data.R +++ b/src/data/streams/pull_wearable_data.R @@ -111,7 +111,7 @@ pull_wearable_data_main <- function(){ if(!setequal(expected_columns, colnames(mutated_data))) stop(paste0("The mutated data for ", device, " does not have the columns RAPIDS expects. The mutation script returned [", paste(colnames(mutated_data), collapse=","),"] but RAPIDS expected [",paste(expected_columns, collapse=","), "]. One ore more mutation scripts in [", sensor,"][MUTATION][SCRIPTS] are adding extra columns or removing or not adding the ones expected")) - participant_data <- rbind(participant_data, mutated_data) + participant_data <- rbind(participant_data, mutated_data %>% distinct()) } if(device_type == "fitbit") diff --git a/src/features/fitbit_sleep_intraday/episodes/sleep_intraday_episodes.py b/src/features/fitbit_sleep_intraday/episodes/sleep_intraday_episodes.py new file mode 100644 index 00000000..cdca5c1d --- /dev/null +++ b/src/features/fitbit_sleep_intraday/episodes/sleep_intraday_episodes.py @@ -0,0 +1,37 @@ +import pandas as pd +import numpy as np + + +def mergeSleepEpisodes(sleep_data, cols_for_groupby): + sleep_episodes = pd.DataFrame(columns=["type_episode_id", "level_episode_id", "level", "unified_level", "is_main_sleep", "type", "timestamp", "duration"]) + if not sleep_data.empty: + sleep_data = sleep_data.groupby(by=cols_for_groupby) + sleep_episodes = sleep_data[["timestamp"]].first() + sleep_episodes["duration"] = sleep_data["duration"].sum() + + return sleep_episodes + + +sleep_intraday = pd.read_csv(snakemake.input["sleep_intraday"]) + +# discard useless columns +for col in ["device_id", "local_timezone", "local_date_time", "local_date", "local_time", "local_hour", "local_minute", "assigned_segments"]: + del sleep_intraday[col] + +# Extract "unified_level" based on "level" field +# For "classic" type, "unified_level" is one of {0, 1} where 0: awake {"awake" + "restless"}, 1: asleep {"asleep"} +# For "stages" type, "unified_level" is one of {0, 1} where 0: awake {"wake"}, 1: asleep {"deep" + "light" + "rem"} +sleep_intraday["unified_level"] = np.where(sleep_intraday["level"].isin(["awake", "restless", "wake"]), 0, 1) + +# Put consecutive rows with the same "level" field together and merge episodes +sleep_intraday.insert(2, "level_episode_id", (sleep_intraday[["type_episode_id", "level"]] != sleep_intraday[["type_episode_id", "level"]].shift()).any(axis=1).cumsum()) +sleep_intraday_episodes = mergeSleepEpisodes(sleep_intraday, ["type_episode_id", "level_episode_id", "level", "unified_level", "is_main_sleep", "type"]) + + +# Generate "start_timestamp" and "end_timestamp" +sleep_intraday_episodes["end_timestamp"] = sleep_intraday_episodes["timestamp"] + ((sleep_intraday_episodes["duration"] - 1) * 1000) + 999 +sleep_intraday_episodes.rename(columns={"timestamp": "start_timestamp"}, inplace=True) + +del sleep_intraday_episodes["duration"] + +sleep_intraday_episodes.to_csv(snakemake.output[0], index=True)