Merge branch 'feature/fitbit_sleep_intraday' into develop
commit
8992a9c9e2
15
Snakefile
15
Snakefile
|
@ -257,11 +257,16 @@ for provider in config["FITBIT_SLEEP_SUMMARY"]["PROVIDERS"].keys():
|
|||
files_to_compute.extend(expand("data/processed/features/{pid}/all_sensor_features.csv", pid=config["PIDS"]))
|
||||
files_to_compute.append("data/processed/features/all_participants/all_sensor_features.csv")
|
||||
|
||||
# for provider in config["FITBIT_SLEEP_INTRADAY"]["PROVIDERS"].keys():
|
||||
# if config["FITBIT_SLEEP_INTRADAY"]["PROVIDERS"][provider]["COMPUTE"]:
|
||||
# files_to_compute.extend(expand("data/raw/{pid}/fitbit_sleep_intraday_raw.csv", pid=config["PIDS"]))
|
||||
# files_to_compute.extend(expand("data/raw/{pid}/fitbit_sleep_intraday_parsed.csv", pid=config["PIDS"]))
|
||||
# files_to_compute.extend(expand("data/raw/{pid}/fitbit_sleep_intraday_parsed_with_datetime.csv", pid=config["PIDS"]))
|
||||
for provider in config["FITBIT_SLEEP_INTRADAY"]["PROVIDERS"].keys():
|
||||
if config["FITBIT_SLEEP_INTRADAY"]["PROVIDERS"][provider]["COMPUTE"]:
|
||||
files_to_compute.extend(expand("data/raw/{pid}/fitbit_sleep_intraday_raw.csv", pid=config["PIDS"]))
|
||||
files_to_compute.extend(expand("data/raw/{pid}/fitbit_sleep_intraday_parsed.csv", pid=config["PIDS"]))
|
||||
files_to_compute.extend(expand("data/interim/{pid}/fitbit_sleep_intraday_episodes_resampled.csv", pid=config["PIDS"]))
|
||||
files_to_compute.extend(expand("data/interim/{pid}/fitbit_sleep_intraday_episodes_resampled_with_datetime.csv", pid=config["PIDS"]))
|
||||
files_to_compute.extend(expand("data/interim/{pid}/fitbit_sleep_intraday_features/fitbit_sleep_intraday_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["FITBIT_SLEEP_INTRADAY"]["PROVIDERS"][provider]["SRC_LANGUAGE"].lower(), provider_key=provider.lower()))
|
||||
files_to_compute.extend(expand("data/processed/features/{pid}/fitbit_sleep_intraday.csv", pid=config["PIDS"]))
|
||||
files_to_compute.extend(expand("data/processed/features/{pid}/all_sensor_features.csv", pid=config["PIDS"]))
|
||||
files_to_compute.append("data/processed/features/all_participants/all_sensor_features.csv")
|
||||
|
||||
for provider in config["FITBIT_STEPS_SUMMARY"]["PROVIDERS"].keys():
|
||||
if config["FITBIT_STEPS_SUMMARY"]["PROVIDERS"][provider]["COMPUTE"]:
|
||||
|
|
36
config.yaml
36
config.yaml
|
@ -379,6 +379,42 @@ FITBIT_SLEEP_SUMMARY:
|
|||
SRC_FOLDER: "rapids" # inside src/features/fitbit_sleep_summary
|
||||
SRC_LANGUAGE: "python"
|
||||
|
||||
# See https://www.rapids.science/latest/features/fitbit-sleep-intraday/
|
||||
FITBIT_SLEEP_INTRADAY:
|
||||
TABLE: fitbit_data
|
||||
PROVIDERS:
|
||||
RAPIDS:
|
||||
COMPUTE: False
|
||||
FEATURES:
|
||||
LEVELS_AND_TYPES_COMBINING_ALL: True
|
||||
LEVELS_AND_TYPES: [countepisode, sumduration, maxduration, minduration, avgduration, medianduration, stdduration]
|
||||
RATIOS_TYPE: [count, duration]
|
||||
RATIOS_SCOPE: [ACROSS_LEVELS, ACROSS_TYPES, WITHIN_LEVELS, WITHIN_TYPES]
|
||||
ROUTINE: [starttimefirstmainsleep, endtimelastmainsleep, starttimefirstnap, endtimelastnap]
|
||||
SLEEP_LEVELS:
|
||||
CLASSIC: [awake, restless, asleep]
|
||||
STAGES: [wake, deep, light, rem]
|
||||
UNIFIED: [awake, asleep]
|
||||
SLEEP_TYPES: [main, nap]
|
||||
INCLUDE_SLEEP_LATER_THAN: 0 # a number ranged from 0 (midnight) to 1439 (23:59)
|
||||
REFERENCE_TIME: MIDNIGHT # chosen from "MIDNIGHT" and "START_OF_THE_SEGMENT"
|
||||
SRC_FOLDER: "rapids" # inside src/features/fitbit_sleep_intraday
|
||||
SRC_LANGUAGE: "python"
|
||||
|
||||
PRICE:
|
||||
COMPUTE: False
|
||||
FEATURES: [avgduration, avgratioduration, avgstarttimeofepisodemain, avgendtimeofepisodemain, avgmidpointofepisodemain, "stdstarttimeofepisodemain", "stdendtimeofepisodemain", "stdmidpointofepisodemain", socialjetlag, meanssdstarttimeofepisodemain, meanssdendtimeofepisodemain, meanssdmidpointofepisodemain, medianssdstarttimeofepisodemain, medianssdendtimeofepisodemain, medianssdmidpointofepisodemain]
|
||||
SLEEP_LEVELS:
|
||||
CLASSIC: [awake, restless, asleep]
|
||||
STAGES: [wake, deep, light, rem]
|
||||
UNIFIED: [awake, asleep]
|
||||
DAY_TYPES: [WEEKEND, WEEK, ALL]
|
||||
GROUP_EPISODES_WITHIN: # by default: today's 6pm to tomorrow's noon
|
||||
START_TIME: 1080 # number of minutes after the midnight (18:00) 18*60
|
||||
LENGTH: 1080 # in minutes (18 hours) 18*60
|
||||
SRC_FOLDER: "price" # inside src/features/fitbit_sleep_intraday
|
||||
SRC_LANGUAGE: "python"
|
||||
|
||||
# See https://www.rapids.science/latest/features/fitbit-steps-summary/
|
||||
FITBIT_STEPS_SUMMARY:
|
||||
TABLE: steps_summary
|
||||
|
|
|
@ -41,6 +41,7 @@ As a tutorial, we will add a new provider for `PHONE_ACCELEROMETER` called `VEGA
|
|||
- Fitbit Heart Rate Summary
|
||||
- Fitbit Heart Rate Intraday
|
||||
- Fitbit Sleep Summary
|
||||
- Fitbit Sleep Intraday
|
||||
- Fitbit Steps Summary
|
||||
- Fitbit Steps Intraday
|
||||
|
||||
|
|
|
@ -0,0 +1,260 @@
|
|||
# Fitbit Sleep Intraday
|
||||
|
||||
Sensor parameters description for `[FITBIT_SLEEP_INTRADAY]`:
|
||||
|
||||
|Key | Description |
|
||||
|----------------|-----------------------------------------------------------------------------------------------------------------------------------
|
||||
|`[TABLE]`| Database table name or file path where the sleep intraday data is stored. The configuration keys in [Device Data Source Configuration](../../setup/configuration/#device-data-source-configuration) control whether this parameter is interpreted as table or file.
|
||||
|
||||
|
||||
The format of the column(s) containing the Fitbit sensor data can be `JSON` or `PLAIN_TEXT`. The data in `JSON` format is obtained directly from the Fitbit API. We support `PLAIN_TEXT` in case you already parsed your data and don't have access to your participants' Fitbit accounts anymore. If your data is in `JSON` format then summary and intraday data come packed together.
|
||||
|
||||
We provide examples of the input format that RAPIDS expects, note that both examples for `JSON` and `PLAIN_TEXT` are tabular and the actual format difference comes in the `fitbit_data` column (we truncate the `JSON` example for brevity).
|
||||
|
||||
??? example "Example of the structure of source data with Fitbit’s sleep API Version 1"
|
||||
|
||||
=== "JSON"
|
||||
|
||||
|device_id |fitbit_data |
|
||||
|---------------------------------------- |--------------------------------------------------------- |
|
||||
|a748ee1a-1d0b-4ae9-9074-279a2b6ba524 |{"sleep": [{"awakeCount": 2, "awakeDuration": 3, "awakeningsCount": 10, "dateOfSleep": "2020-10-07", "duration": 8100000, "efficiency": 91, "endTime": "2020-10-07T18:10:00.000", "isMainSleep": true, "logId": 14147921940, "minuteData": [{"dateTime": "15:55:00", "value": "3"}, {"dateTime": "15:56:00", "value": "3"}, {"dateTime": "15:57:00", "value": "2"},...], "minutesAfterWakeup": 0, "minutesAsleep": 123, "minutesAwake": 12, "minutesToFallAsleep": 0, "restlessCount": 8, "restlessDuration": 9, "startTime": "2020-10-07T15:55:00.000", "timeInBed": 135}, {"awakeCount": 0, "awakeDuration": 0, "awakeningsCount": 1, "dateOfSleep": "2020-10-07", "duration": 3780000, "efficiency": 100, "endTime": "2020-10-07T10:52:30.000", "isMainSleep": false, "logId": 14144903977, "minuteData": [{"dateTime": "09:49:00", "value": "1"}, {"dateTime": "09:50:00", "value": "1"}, {"dateTime": "09:51:00", "value": "1"},...], "minutesAfterWakeup": 1, "minutesAsleep": 62, "minutesAwake": 0, "minutesToFallAsleep": 0, "restlessCount": 1, "restlessDuration": 1, "startTime": "2020-10-07T09:49:00.000", "timeInBed": 63}], "summary": {"totalMinutesAsleep": 185, "totalSleepRecords": 2, "totalTimeInBed": 198}}
|
||||
|a748ee1a-1d0b-4ae9-9074-279a2b6ba524 |{"sleep": [{"awakeCount": 3, "awakeDuration": 21, "awakeningsCount": 16, "dateOfSleep": "2020-10-08", "duration": 19260000, "efficiency": 89, "endTime": "2020-10-08T06:01:30.000", "isMainSleep": true, "logId": 14150613895, "minuteData": [{"dateTime": "00:40:00", "value": "3"}, {"dateTime": "00:41:00", "value": "3"}, {"dateTime": "00:42:00", "value": "3"},...], "minutesAfterWakeup": 0, "minutesAsleep": 275, "minutesAwake": 33, "minutesToFallAsleep": 0, "restlessCount": 13, "restlessDuration": 25, "startTime": "2020-10-08T00:40:00.000", "timeInBed": 321}], "summary": {"totalMinutesAsleep": 275, "totalSleepRecords": 1, "totalTimeInBed": 321}}
|
||||
|a748ee1a-1d0b-4ae9-9074-279a2b6ba524 |{"sleep": [{"awakeCount": 1, "awakeDuration": 3, "awakeningsCount": 8, "dateOfSleep": "2020-10-09", "duration": 19320000, "efficiency": 96, "endTime": "2020-10-09T05:57:30.000", "isMainSleep": true, "logId": 14161136803, "minuteData": [{"dateTime": "00:35:30", "value": "2"}, {"dateTime": "00:36:30", "value": "1"}, {"dateTime": "00:37:30", "value": "1"},...], "minutesAfterWakeup": 0, "minutesAsleep": 309, "minutesAwake": 13, "minutesToFallAsleep": 0, "restlessCount": 7, "restlessDuration": 10, "startTime": "2020-10-09T00:35:30.000", "timeInBed": 322}], "summary": {"totalMinutesAsleep": 309, "totalSleepRecords": 1, "totalTimeInBed": 322}}
|
||||
|
||||
=== "PLAIN_TEXT"
|
||||
|
||||
All columns are mandatory, however, all except `device_id`, `local_date_time` and `duration` can be empty if you don't have that data. Just have in mind that some features might be inaccurate or empty as `type_episode_id`, `level`, `is_main_sleep`, and `type` are used for sleep episodes extraction. `type_episode_id` is based on where it is extracted: if it is extracted from the 1st "minutesData" block, the `type_episode_id` field will be 0. Similarly, the kth block will be k-1. Actually, you only need to make sure rows extracted from the same "minutesData" block are assigned with the same unique `type_episode_id` value.
|
||||
|
||||
|device_id |type_episode_id |local_date_time |duration |level |is_main_sleep |type |
|
||||
|------------------------------------ |---------------- |------------------- |--------- |---------- |-------------- |-------------- |
|
||||
|a748ee1a-1d0b-4ae9-9074-279a2b6ba524 |0 |2020-10-07 15:55:00 |60 |awake |0 |classic |
|
||||
|a748ee1a-1d0b-4ae9-9074-279a2b6ba524 |0 |2020-10-07 15:56:00 |60 |awake |0 |classic |
|
||||
|a748ee1a-1d0b-4ae9-9074-279a2b6ba524 |0 |2020-10-07 15:57:00 |60 |restless |0 |classic |
|
||||
|
||||
??? example "Example of the structure of source data with Fitbit’s sleep API Version 1.2"
|
||||
|
||||
=== "JSON"
|
||||
|
||||
|device_id |fitbit_data |
|
||||
|---------------------------------------- |--------------------------------------------------------- |
|
||||
|a748ee1a-1d0b-4ae9-9074-279a2b6ba524 |{"sleep":[{"dateOfSleep":"2020-10-10","duration":3600000,"efficiency":92,"endTime":"2020-10-10T16:37:00.000","infoCode":2,"isMainSleep":false,"levels":{"data":[{"dateTime":"2020-10-10T15:36:30.000","level":"restless","seconds":60},{"dateTime":"2020-10-10T15:37:30.000","level":"asleep","seconds":660},{"dateTime":"2020-10-10T15:48:30.000","level":"restless","seconds":60},...], "summary":{"asleep":{"count":0,"minutes":56},"awake":{"count":0,"minutes":0},"restless":{"count":3,"minutes":4}}},"logId":26315914306,"minutesAfterWakeup":0,"minutesAsleep":55,"minutesAwake":5,"minutesToFallAsleep":0,"startTime":"2020-10-10T15:36:30.000","timeInBed":60,"type":"classic"},{"dateOfSleep":"2020-10-10","duration":22980000,"efficiency":88,"endTime":"2020-10-10T08:10:00.000","infoCode":0,"isMainSleep":true,"levels":{"data":[{"dateTime":"2020-10-10T01:46:30.000","level":"light","seconds":420},{"dateTime":"2020-10-10T01:53:30.000","level":"deep","seconds":1230},{"dateTime":"2020-10-10T02:14:00.000","level":"light","seconds":360},...], "summary":{"deep":{"count":3,"minutes":92,"thirtyDayAvgMinutes":0},"light":{"count":29,"minutes":193,"thirtyDayAvgMinutes":0},"rem":{"count":4,"minutes":33,"thirtyDayAvgMinutes":0},"wake":{"count":28,"minutes":65,"thirtyDayAvgMinutes":0}}},"logId":26311786557,"minutesAfterWakeup":0,"minutesAsleep":318,"minutesAwake":65,"minutesToFallAsleep":0,"startTime":"2020-10-10T01:46:30.000","timeInBed":383,"type":"stages"}],"summary":{"stages":{"deep":92,"light":193,"rem":33,"wake":65},"totalMinutesAsleep":373,"totalSleepRecords":2,"totalTimeInBed":443}}
|
||||
|a748ee1a-1d0b-4ae9-9074-279a2b6ba524 |{"sleep":[{"dateOfSleep":"2020-10-11","duration":41640000,"efficiency":89,"endTime":"2020-10-11T11:47:00.000","infoCode":0,"isMainSleep":true,"levels":{"data":[{"dateTime":"2020-10-11T00:12:30.000","level":"wake","seconds":450},{"dateTime":"2020-10-11T00:20:00.000","level":"light","seconds":870},{"dateTime":"2020-10-11T00:34:30.000","level":"wake","seconds":780},...], "summary":{"deep":{"count":4,"minutes":52,"thirtyDayAvgMinutes":62},"light":{"count":32,"minutes":442,"thirtyDayAvgMinutes":364},"rem":{"count":6,"minutes":68,"thirtyDayAvgMinutes":58},"wake":{"count":29,"minutes":132,"thirtyDayAvgMinutes":94}}},"logId":26589710670,"minutesAfterWakeup":1,"minutesAsleep":562,"minutesAwake":132,"minutesToFallAsleep":0,"startTime":"2020-10-11T00:12:30.000","timeInBed":694,"type":"stages"}],"summary":{"stages":{"deep":52,"light":442,"rem":68,"wake":132},"totalMinutesAsleep":562,"totalSleepRecords":1,"totalTimeInBed":694}}
|
||||
|a748ee1a-1d0b-4ae9-9074-279a2b6ba524 |{"sleep":[{"dateOfSleep":"2020-10-12","duration":28980000,"efficiency":93,"endTime":"2020-10-12T09:34:30.000","infoCode":0,"isMainSleep":true,"levels":{"data":[{"dateTime":"2020-10-12T01:31:00.000","level":"wake","seconds":600},{"dateTime":"2020-10-12T01:41:00.000","level":"light","seconds":60},{"dateTime":"2020-10-12T01:42:00.000","level":"deep","seconds":2340},...], "summary":{"deep":{"count":4,"minutes":63,"thirtyDayAvgMinutes":59},"light":{"count":27,"minutes":257,"thirtyDayAvgMinutes":364},"rem":{"count":5,"minutes":94,"thirtyDayAvgMinutes":58},"wake":{"count":24,"minutes":69,"thirtyDayAvgMinutes":95}}},"logId":26589710673,"minutesAfterWakeup":0,"minutesAsleep":415,"minutesAwake":68,"minutesToFallAsleep":0,"startTime":"2020-10-12T01:31:00.000","timeInBed":483,"type":"stages"}],"summary":{"stages":{"deep":63,"light":257,"rem":94,"wake":69},"totalMinutesAsleep":415,"totalSleepRecords":1,"totalTimeInBed":483}}
|
||||
|
||||
=== "PLAIN_TEXT"
|
||||
|
||||
All columns are mandatory, however, all except `device_id`, `local_date_time` and `duration` can be empty if you don't have that data. Just have in mind that some features might be inaccurate or empty as `type_episode_id`, `level`, `is_main_sleep`, and `type` are used for sleep episodes extraction. `type_episode_id` is based on where it is extracted: if it is extracted from the 1st "data" and "shortData" block, the `type_episode_id` field will be 0. Similarly, the kth block will be k-1. Actually, you only need to make sure rows extracted from the same "minutesData" block are assigned with the same unique `type_episode_id` value.
|
||||
|
||||
|device_id |type_episode_id |local_date_time |duration |level |is_main_sleep |type |
|
||||
|------------------------------------ |---------------- |------------------- |--------- |---------- |-------------- |-------------- |
|
||||
|a748ee1a-1d0b-4ae9-9074-279a2b6ba524 |0 |2020-10-10 15:36:30 |60 |restless |0 |classic |
|
||||
|a748ee1a-1d0b-4ae9-9074-279a2b6ba524 |0 |2020-10-10 15:37:30 |660 |asleep |0 |classic |
|
||||
|a748ee1a-1d0b-4ae9-9074-279a2b6ba524 |0 |2020-10-10 15:48:30 |60 |restless |0 |classic |
|
||||
|a748ee1a-1d0b-4ae9-9074-279a2b6ba524 |... |... |... |... |... |... |
|
||||
|a748ee1a-1d0b-4ae9-9074-279a2b6ba524 |1 |2020-10-10 01:46:30 |420 |light |1 |stages |
|
||||
|a748ee1a-1d0b-4ae9-9074-279a2b6ba524 |1 |2020-10-10 01:53:30 |1230 |deep |1 |stages |
|
||||
|
||||
## RAPIDS provider
|
||||
|
||||
!!! info "Available time segments"
|
||||
- Available for all time segments
|
||||
|
||||
!!! info "File Sequence"
|
||||
```bash
|
||||
- data/raw/{pid}/fitbit_sleep_intraday_raw.csv
|
||||
- data/raw/{pid}/fitbit_sleep_intraday_parsed.csv
|
||||
- data/interim/{pid}/fitbit_sleep_intraday_episodes_resampled.csv
|
||||
- data/interim/{pid}/fitbit_sleep_intraday_episodes_resampled_with_datetime.csv
|
||||
- data/interim/{pid}/fitbit_sleep_intraday_features/fitbit_sleep_intraday_{language}_{provider_key}.csv
|
||||
- data/processed/features/{pid}/fitbit_sleep_intraday.csv
|
||||
```
|
||||
|
||||
|
||||
Parameters description for `[FITBIT_SLEEP_INTRADAY][PROVIDERS][RAPIDS]`:
|
||||
|
||||
|Key | Description |
|
||||
|----------------|-----------------------------------------------------------------------------------------------------------------------------------
|
||||
|`[COMPUTE]` | Set to `True` to extract `FITBIT_SLEEP_INTRADAY` features from the `RAPIDS` provider|
|
||||
|`[FEATURES]` | Features to be computed from sleep intraday data, see table below |
|
||||
|`[SLEEP_LEVELS]` | Fitbit’s sleep API Version 1 only provides `CLASSIC` records. However, Version 1.2 provides 2 types of records: `CLASSIC` and `STAGES`. `STAGES` is only available in devices with a heart rate sensor and even those devices will fail to report it if the battery is low or the device is not tight enough. While `CLASSIC` contains 3 sleep levels (`awake`, `restless`, and `asleep`), `STAGES` contains 4 sleep levels (`wake`, `deep`, `light`, `rem`). To make it consistent, RAPIDS grouped them into 2 `UNIFIED` sleep levels: `awake` (`CLASSIC`: `awake` and `restless`; `STAGES`: `wake`) and `asleep` (`CLASSIC`: `asleep`; `STAGES`: `deep`, `light`, and `rem`).
|
||||
|`[SLEEP_TYPES]` | Types of sleep to be included in the feature extraction computation. Fitbit provides 2 types of sleep: `main`, `nap`.
|
||||
|`[INCLUDE_SLEEP_LATER_THAN]`| All resampled sleep rows (bin interval: one minute) that started after this time will be included in the feature computation. It is a number ranging from 0 (midnight) to 1439 (23:59) which denotes the number of minutes after midnight. If a segment is longer than one day, this value is for every day.
|
||||
|`[REFERENCE_TIME]`| The reference point from which the `[ROUTINE]` features are to be computed. Chosen from `MIDNIGHT` and `START_OF_THE_SEGMENT`, default is `MIDNIGHT`. If you have multiple time segments per day it might be more informative to set this flag to `START_OF_THE_SEGMENT`.
|
||||
|
||||
|
||||
Features description for `[FITBIT_STEPS_INTRADAY][PROVIDERS][RAPIDS][LEVELS_AND_TYPES]`:
|
||||
|
||||
|Feature |Units |Description |
|
||||
|------------------------------- |-------------- |-------------------------------------------------------------|
|
||||
|countepisode`[LEVEL][TYPE]` |episodes |Number of `[LEVEL][TYPE]`sleep episodes. `[LEVEL]`is one of `[SLEEP_LEVELS]` (e.g. awake-classic or rem-stages) and `[TYPE]` is one of `[SLEEP_TYPES]` (e.g. main). Both `[LEVEL]`and `[TYPE]` can also be `all` when ``LEVELS_AND_TYPES_COMBINING_ALL`` is True, which ignores the levels and groups by sleep types.
|
||||
|sumduration`[LEVEL][TYPE]` |minutes |Total duration of all `[LEVEL][TYPE]`sleep episodes. `[LEVEL]`is one of `[SLEEP_LEVELS]` (e.g. awake-classic or rem-stages) and `[TYPE]` is one of `[SLEEP_TYPES]` (e.g. main). Both `[LEVEL]` and `[TYPE]`can also be `all` when `LEVELS_AND_TYPES_COMBINING_ALL` is True, which ignores the levels and groups by sleep types.
|
||||
|maxduration`[LEVEL][TYPE]` |minutes | Longest duration of any `[LEVEL][TYPE]`sleep episode. `[LEVEL]`is one of `[SLEEP_LEVELS]` (e.g. awake-classic or rem-stages) and `[TYPE]` is one of `[SLEEP_TYPES]` (e.g. main). Both `[LEVEL]` and `[TYPE]`can also be `all` when `LEVELS_AND_TYPES_COMBINING_ALL` is True, which ignores the levels and groups by sleep types.
|
||||
|minduration`[LEVEL][TYPE]` |minutes | Shortest duration of any `[LEVEL][TYPE]`sleep episode. `[LEVEL]`is one of `[SLEEP_LEVELS]` (e.g. awake-classic or rem-stages) and `[TYPE]` is one of `[SLEEP_TYPES]` (e.g. main). Both `[LEVEL]` and `[TYPE]`can also be `all` when `LEVELS_AND_TYPES_COMBINING_ALL` is True, which ignores the levels and groups by sleep types.
|
||||
|avgduration`[LEVEL][TYPE]` |minutes | Average duration of all `[LEVEL][TYPE]`sleep episodes. `[LEVEL]`is one of `[SLEEP_LEVELS]` (e.g. awake-classic or rem-stages) and `[TYPE]` is one of `[SLEEP_TYPES]` (e.g. main). Both `[LEVEL]` and `[TYPE]`can also be `all` when `LEVELS_AND_TYPES_COMBINING_ALL` is True, which ignores the levels and groups by sleep types.
|
||||
|medianduration`[LEVEL][TYPE]` |minutes | Median duration of all `[LEVEL][TYPE]`sleep episodes. `[LEVEL]`is one of `[SLEEP_LEVELS]` (e.g. awake-classic or rem-stages) and `[TYPE]` is one of `[SLEEP_TYPES]` (e.g. main). Both `[LEVEL]` and `[TYPE]`can also be `all` when `LEVELS_AND_TYPES_COMBINING_ALL` is True, which ignores the levels and groups by sleep types.
|
||||
|stdduration`[LEVEL][TYPE]` |minutes | Standard deviation duration of all `[LEVEL][TYPE]`sleep episodes. `[LEVEL]`is one of `[SLEEP_LEVELS]` (e.g. awake-classic or rem-stages) and `[TYPE]` is one of `[SLEEP_TYPES]` (e.g. main). Both `[LEVEL]` and `[TYPE]`can also be `all` when `LEVELS_AND_TYPES_COMBINING_ALL` is True, which ignores the levels and groups by sleep types.
|
||||
|
||||
|
||||
Features description for `[FITBIT_STEPS_INTRADAY][PROVIDERS][RAPIDS]` RATIOS `[ACROSS_LEVELS]`:
|
||||
|
||||
|Feature |Units |Description |
|
||||
|-------------------------- |-------------- |-------------------------------------------------------------|
|
||||
|ratiocount`[LEVEL]` |-|Ratio between the **count** of episodes of a single sleep `[LEVEL]` and the **count** of all episodes of all levels during both `main` and `nap` sleep types. This answers the question: what percentage of all `wake`, `deep`, `light`, and `rem` episodes were `rem`? (e.g., $countepisode[remstages][all] / countepisode[all][all]$)
|
||||
|ratioduration`[LEVEL]` |-|Ratio between the **duration** of episodes of a single sleep `[LEVEL]` and the **duration** of all episodes of all levels during both `main` and `nap` sleep types. This answers the question: what percentage of all `wake`, `deep`, `light`, and `rem` time was `rem`? (e.g., $sumduration[remstages][all] / sumduration[all][all]$)
|
||||
|
||||
|
||||
Features description for `[FITBIT_STEPS_INTRADAY][PROVIDERS][RAPIDS]` RATIOS `[ACROSS_TYPES]`:
|
||||
|
||||
|Feature |Units |Description |
|
||||
|-------------------------- |-------------- |-------------------------------------------------------------|
|
||||
|ratiocountmain |- |Ratio between the **count** of all `main` episodes (independently of the levels inside) divided by the **count** of all `main` and `nap` episodes. This answers the question: what percentage of all sleep episodes (`main` and `nap`) were `main`? We do not provide the ratio for `nap` because is complementary. ($countepisode[all][main] / countepisode[all][all]$)
|
||||
|ratiodurationmain |- |Ratio between the **duration** of all `main` episodes (independently of the levels inside) divided by the **duration** of all `main` and `nap` episodes. This answers the question: what percentage of all sleep time (`main` and `nap`) was `main`? We do not provide the ratio for `nap` because is complementary. ($sumduration[all][main] / sumduration[all][all]$)
|
||||
|
||||
|
||||
Features description for `[FITBIT_STEPS_INTRADAY][PROVIDERS][RAPIDS]` RATIOS `[WITHIN_LEVELS]`:
|
||||
|
||||
|Feature |Units |Description |
|
||||
|--------------------------------- |-------------- |-------------------------------------------------------------|
|
||||
|ratiocount`[TYPE]`within`[LEVEL]` |- |Ratio between the **count** of episodes of a single sleep `[LEVEL]` during `main` sleep divided by the **count** of episodes of a single sleep `[LEVEL]` during `main` **and** `nap`. This answers the question: are `rem` episodes more frequent during `main` than `nap` sleep? We do not provide the ratio for `nap` because is complementary. ($countepisode[remstages][main] / countepisode[remstages][all]$)
|
||||
|ratioduration`[TYPE]`within`[LEVEL]` |- |Ratio between the **duration** of episodes of a single sleep `[LEVEL]` during `main` sleep divided by the **duration** of episodes of a single sleep `[LEVEL]` during `main` **and** `nap`. This answers the question: is `rem` time more frequent during `main` than `nap` sleep? We do not provide the ratio for `nap` because is complementary. ($countepisode[remstages][main] / countepisode[remstages][all]$)
|
||||
|
||||
|
||||
Features description for `[FITBIT_STEPS_INTRADAY][PROVIDERS][RAPIDS]` RATIOS `[WITHIN_TYPES]`:
|
||||
|
||||
|Feature |Units|Description|
|
||||
| - |- | - |
|
||||
|ratiocount`[LEVEL]`within`[TYPE]` |-|Ratio between the **count** of episodes of a single sleep `[LEVEL]` and the **count** of all episodes of all levels during either `main` or `nap` sleep types. This answers the question: what percentage of all `wake`, `deep`, `light`, and `rem` episodes were `rem` during `main`/`nap` sleep time? (e.g., $countepisode[remstages][main] / countepisode[all][main]$)
|
||||
|ratioduration`[LEVEL]`within`[TYPE]` |-|Ratio between the **duration** of episodes of a single sleep `[LEVEL]` and the **duration** of all episodes of all levels during either `main` or `nap` sleep types. This answers the question: what percentage of all `wake`, `deep`, `light`, and `rem` time was `rem` during `main`/`nap` sleep time? (e.g., $sumduration[remstages][main] / sumduration[all][main]$)
|
||||
|
||||
|
||||
Features description for `[FITBIT_STEPS_INTRADAY][PROVIDERS][RAPIDS][ROUTINE]`:
|
||||
|
||||
|Feature |Units |Description |
|
||||
|--------------------------------- |-------------- |-------------------------------------------------------------|
|
||||
|starttimefirstmainsleep |minutes |Start time (in minutes since `REFERENCE_TIME`) of the first main sleep episode after `INCLUDE_EPISODES_LATER_THAN`.
|
||||
|endtimelastmainsleep |minutes |End time (in minutes since `REFERENCE_TIME`) of the last main sleep episode after `INCLUDE_EPISODES_LATER_THAN`.
|
||||
|starttimefirstnap |minutes |Start time (in minutes since `REFERENCE_TIME`) of the first nap episode after `INCLUDE_EPISODES_LATER_THAN`.
|
||||
|endtimelastnap |minutes |End time (in minutes since `REFERENCE_TIME`) of the last nap episode after `INCLUDE_EPISODES_LATER_THAN`.
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
!!! note "Assumptions/Observations"
|
||||
1. Deleting values from `[SLEEP_LEVELS]` or `[SLEEP_TYPES]` will only change the features you receive from `[LEVELS_AND_TYPES]`. For example if `STAGES` only contains `[rem, light]` you will not receive `countepisode[wake|deep][TYPE]` or sum, max, min, avg, median, or std `duration`. These values will not influence `RATIOS` or `ROUTINE` features.
|
||||
2. Any `[LEVEL]` grouping is done within the elements of each class `CLASSIC`, `STAGES`, and `UNIFIED`. That is, we never combine `CLASSIC` or `STAGES` types to compute features when `LEVELS_AND_TYPES_COMBINING_ALL` is True or when computing `RATIOS`.
|
||||
|
||||
|
||||
## PRICE provider
|
||||
|
||||
!!! info "Available time segments"
|
||||
- Available for any time segments larger or equal to one day
|
||||
|
||||
!!! info "File Sequence"
|
||||
```bash
|
||||
- data/raw/{pid}/fitbit_sleep_intraday_raw.csv
|
||||
- data/raw/{pid}/fitbit_sleep_intraday_parsed.csv
|
||||
- data/interim/{pid}/fitbit_sleep_intraday_episodes_resampled.csv
|
||||
- data/interim/{pid}/fitbit_sleep_intraday_episodes_resampled_with_datetime.csv
|
||||
- data/interim/{pid}/fitbit_sleep_intraday_features/fitbit_sleep_intraday_{language}_{provider_key}.csv
|
||||
- data/processed/features/{pid}/fitbit_sleep_intraday.csv
|
||||
```
|
||||
|
||||
|
||||
Parameters description for `[FITBIT_SLEEP_INTRADAY][PROVIDERS][PRICE]`:
|
||||
|
||||
|Key | Description |
|
||||
|----------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
||||
|`[COMPUTE]` | Set to `True` to extract `FITBIT_SLEEP_INTRADAY` features from the `PRICE` provider |
|
||||
|`[FEATURES]` | Features to be computed from sleep intraday data, see table below
|
||||
|`[SLEEP_LEVELS]` | Fitbit’s sleep API Version 1 only provides `CLASSIC` records. However, Version 1.2 provides 2 types of records: `CLASSIC` and `STAGES`. `STAGES` is only available in devices with a heart rate sensor and even those devices will fail to report it if the battery is low or the device is not tight enough. While `CLASSIC` contains 3 sleep levels (`awake`, `restless`, and `asleep`), `STAGES` contains 4 sleep levels (`wake`, `deep`, `light`, `rem`). To make it consistent, RAPIDS grouped them into 2 `UNIFIED` sleep levels: `awake` (`CLASSIC`: `awake` and `restless`; `STAGES`: `wake`) and `asleep` (`CLASSIC`: `asleep`; `STAGES`: `deep`, `light`, and `rem`).
|
||||
|`[DAY_TYPE]` | The features of this provider can be computed using daily averages/standard deviations that were extracted on `WEEKEND` days only, `WEEK` days only, or `ALL` days|
|
||||
|`[GROUP_EPISODES_WITHIN]` | This parameter contains 2 values: `[START_TIME]` and `[LENGTH]`. Only `main` sleep episodes that intersect or contain the period between [`START_TIME`, `START_TIME` + `LENGTH`] are taken into account to compute the features described below. Both `[START_TIME]` and `[LENGTH]` are in minutes. `[START_TIME]` is a number ranging from 0 (midnight) to 1439 (23:59) which denotes the number of minutes after midnight. `[LENGTH]` is a number smaller than 1440 (24 hours). |
|
||||
|
||||
|
||||
Features description for `[FITBIT_STEPS_INTRADAY][PROVIDERS][PRICE]`:
|
||||
|
||||
|Feature |Units |Description |
|
||||
|------------------------------------- |----------------- |-------------------------------------------------------------|
|
||||
|avgduration`[LEVEL]`main`[DAY_TYPE]` |minutes | Average duration of daily `LEVEL` sleep episodes. You can include daily average that were computed on weekend days, week days or both depending on the value of the `DAY_TYPE` flag.
|
||||
|avgratioduration`[LEVEL]`withinmain`[DAY_TYPE]` |- | Average ratio between daily `LEVEL` time and in-bed time inferred from `main` sleep episodes. `LEVEL` is one of `SLEEP_LEVELS` (e.g. awake-classic or rem-stages). In-bed time is the total duration of all `main` sleep episodes for each day. You can include daily ratios that were computed on weekend days, week days or both depending on the value of the `DAY_TYPE` flag.
|
||||
|avgstarttimeofepisodemain`[DAY_TYPE]` |minutes | Average start time of the first `main` sleep episode of each day in a time segment. You can include daily start times from episodes detected on weekend days, week days or both depending on the value of the `DAY_TYPE` flag.
|
||||
|avgendtimeofepisodemain`[DAY_TYPE]` |minutes | Average end time of the last `main` sleep episode of each day in a time segment. You can include daily end times from episodes detected on weekend days, week days or both depending on the value of the `DAY_TYPE` flag.
|
||||
|avgmidpointofepisodemain`[DAY_TYPE]` |minutes | Average mid time between the start of the first `main` sleep episode and the end of the last `main` sleep episode of each day in a time segment. You can include episodes detected on weekend days, week days or both depending on the value of the `DAY_TYPE` flag.
|
||||
|stdstarttimeofepisodemain`[DAY_TYPE]` |minutes | Standard deviation of start time of the first `main` sleep episode of each day in a time segment. You can include daily start times from episodes detected on weekend days, week days or both depending on the value of the `DAY_TYPE` flag.
|
||||
|stdendtimeofepisodemain`[DAY_TYPE]` |minutes | Standard deviation of end time of the last `main` sleep episode of each day in a time segment. You can include daily end times from episodes detected on weekend days, week days or both depending on the value of the `DAY_TYPE` flag.
|
||||
|stdmidpointofepisodemain`[DAY_TYPE]` |minutes | Standard deviation of mid time between the start of the first `main` sleep episode and the end of the last `main` sleep episode of each day in a time segment. You can include episodes detected on weekend days, week days or both depending on the value of the `DAY_TYPE` flag.
|
||||
|socialjetlag |minutes | Difference in minutes between the avgstarttimeofepisodemain (bed time) of weekends and weekdays.
|
||||
|meanssdstarttimeofepisodemain |minutes squared | Same as `avgstarttimeofepisodemain[DAY_TYPE]` but the average is computed over the squared differences of each pair of consecutive start times.
|
||||
|meanssdendtimeofepisodemain |minutes squared | Same as `avgendtimeofepisodemain[DAY_TYPE]` but the average is computed over the squared differences of each pair of consecutive end times.
|
||||
|meanssdmidpointofepisodemain |minutes squared | Same as `avgmidpointofepisodemain[DAY_TYPE]` but the average is computed over the squared differences of each pair of consecutive mid times.
|
||||
|medianssdstarttimeofepisodemain |minutes squared | Same as `avgstarttimeofepisodemain[DAY_TYPE]` but the median is computed over the squared differences of each pair of consecutive start times.
|
||||
|medianssdendtimeofepisodemain |minutes squared | Same as `avgendtimeofepisodemain[DAY_TYPE]` but the median is computed over the squared differences of each pair of consecutive end times.
|
||||
|medianssdmidpointofepisodemain |minutes squared | Same as `avgmidpointofepisodemain[DAY_TYPE]` but the median is computed over the squared differences of each pair of consecutive mid times.
|
||||
|
||||
|
||||
|
||||
!!! note "Assumptions/Observations"
|
||||
1. These features are based on descriptive statistics computed across daily values (start/end/mid times of sleep episodes). This is the reason why they are only available on time segments that are longer than 24 hours (we need at least 1 day to get the average).
|
||||
2. Even though Fitbit provides 2 types of sleep episodes (`main` and `nap`), only `main` sleep episodes are considered.
|
||||
3. How do we assign sleep episodes to specific dates?
|
||||
|
||||
`START_TIME` and `LENGTH` control the dates that sleep episodes belong to. For a pair of `[START_TIME]` and `[LENGTH]`, sleep episodes (blue boxes) can only be placed at the following places:
|
||||
|
||||
<figure>
|
||||
<img src="../../img/features_fitbit_sleep_intraday.png" max-width="100%" />
|
||||
<figcaption>Relationship between sleep episodes and the given times`([START_TIME], [LENGTH])`</figcaption>
|
||||
</figure>
|
||||
|
||||
- If the end time of a sleep episode is before `[START_TIME]`, it will belong to the day before its start date (e.g. sleep episode #1).
|
||||
|
||||
- if (1) the start time or the end time of a sleep episode are between (overlap) `[START_TIME]` and `[START_TIME] + [LENGTH]` or (2) the start time is before `[START_TIME]` and the end time is after `[START_TIME] + [LENGTH]`, it will belong to its start date (e.g. sleep episode #2, #3, #4, #5).
|
||||
|
||||
- If the start time of a sleep episode is after `START_TIME] + [LENGTH]`, it will belong to the day after its start date (e.g. sleep episode #6).
|
||||
|
||||
Only `main` sleep episodes that intersect or contain the period between `[START_TIME]` and `[START_TIME] + [LENGTH]` will be included in the feature computation. If we process the following `main` sleep episodes:
|
||||
|
||||
| episode |start|end|
|
||||
|-|-|-|
|
||||
|1|2021-02-01 12:00|2021-02-01 15:00|
|
||||
|2|2021-02-01 21:00|2021-02-02 03:00|02-01
|
||||
|3|2021-02-02 05:00|2021-02-02 08:00|02-01
|
||||
|4|2021-02-02 11:00|2021-02-02 14:00|
|
||||
|5|2021-02-02 19:00|2021-02-03 06:00|02-02
|
||||
|
||||
And our parameters:
|
||||
|
||||
- `[INCLUDE_EPISODES_INTERSECTING][START_TIME]` = 1320 (today's 22:00)
|
||||
|
||||
- `[INCLUDE_EPISODES_INTERSECTING][LENGTH]` = 720 (tomorrow's 10:00, or 22:00 + 12 hours)
|
||||
|
||||
Only sleep episodes 2, 3,and 5 would be considered.
|
||||
|
||||
4. Time related features represent the number of minutes between the start/end/midpoint of sleep episodes and the assigned day's midnight.
|
||||
|
||||
5. All `main` sleep episodes are chunked within the requested [time segments](../../setup/configuration/#time-segments) which need to be at least 24 hours or more long (1, 2, 3, 7 days, etc.). Then, daily features will be extracted and averaged across the length of the time segment, for example:
|
||||
|
||||
The daily features extracted on 2021-02-01 will be:
|
||||
|
||||
- starttimeofepisodemain (bedtime) is `21 * 60` (episode 2 start time 2021-02-01 21:00)
|
||||
|
||||
- endtimeofepisodemain (wake time) is `32 * 60 `(episode 3 end time 2021-02-02 08:00 + 24)
|
||||
|
||||
- midpointofepisodemain (midpoint sleep) is `[(21 * 60) + (32 * 60)] / 2`
|
||||
|
||||
|
||||
The daily features extracted on 2021-02-02 will be:
|
||||
|
||||
- starttimeofepisodemain (bedtime) is `19 * 60` (episode 5 start time 2021-02-01 19:00)
|
||||
|
||||
- endtimeofepisodemain (wake time) is `30 * 60 `(episode 5 end time 2021-02-03 06:00 + 24)
|
||||
|
||||
- midpointofepisodemain (midpoint sleep) is `[(19 * 60) + (30 * 60)] / 2`
|
||||
|
||||
And `avgstarttimeofepisodemain[DAY_TYPE]` will be `([21 * 60] + [19 * 60]) / 2`
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
Binary file not shown.
After Width: | Height: | Size: 76 KiB |
|
@ -26,3 +26,7 @@ div[data-md-component=announce]>div#announce-msg>a{
|
|||
color: var(--md-typeset-a-color);
|
||||
text-decoration: underline;
|
||||
}
|
||||
|
||||
.md-typeset table:not([class]) th {
|
||||
min-width: 0rem;
|
||||
}
|
|
@ -105,6 +105,7 @@ nav:
|
|||
- Fitbit Heart Rate Summary: features/fitbit-heartrate-summary.md
|
||||
- Fitbit Heart Rate Intraday: features/fitbit-heartrate-intraday.md
|
||||
- Fitbit Sleep Summary: features/fitbit-sleep-summary.md
|
||||
- Fitbit Sleep Intraday: features/fitbit-sleep-intraday.md
|
||||
- Fitbit Steps Summary: features/fitbit-steps-summary.md
|
||||
- Fitbit Steps Intraday: features/fitbit-steps-intraday.md
|
||||
- Empatica:
|
||||
|
|
|
@ -660,6 +660,40 @@ rule fitbit_sleep_summary_r_features:
|
|||
script:
|
||||
"../src/features/entry.R"
|
||||
|
||||
rule resample_sleep_episodes:
|
||||
input:
|
||||
"data/raw/{pid}/fitbit_sleep_intraday_parsed.csv"
|
||||
output:
|
||||
"data/interim/{pid}/fitbit_sleep_intraday_episodes_resampled.csv"
|
||||
script:
|
||||
"../src/features/utils/resample_episodes.R"
|
||||
|
||||
rule fitbit_sleep_intraday_python_features:
|
||||
input:
|
||||
sensor_data = "data/interim/{pid}/fitbit_sleep_intraday_episodes_resampled_with_datetime.csv",
|
||||
time_segments_labels = "data/interim/time_segments/{pid}_time_segments_labels.csv"
|
||||
params:
|
||||
provider = lambda wildcards: config["FITBIT_SLEEP_INTRADAY"]["PROVIDERS"][wildcards.provider_key.upper()],
|
||||
provider_key = "{provider_key}",
|
||||
sensor_key = "fitbit_sleep_intraday"
|
||||
output:
|
||||
"data/interim/{pid}/fitbit_sleep_intraday_features/fitbit_sleep_intraday_python_{provider_key}.csv"
|
||||
script:
|
||||
"../src/features/entry.py"
|
||||
|
||||
rule fitbit_sleep_intraday_r_features:
|
||||
input:
|
||||
sensor_data = "data/interim/{pid}/fitbit_sleep_intraday_episodes_resampled_with_datetime.csv",
|
||||
time_segments_labels = "data/interim/time_segments/{pid}_time_segments_labels.csv"
|
||||
params:
|
||||
provider = lambda wildcards: config["FITBIT_SLEEP_INTRADAY"]["PROVIDERS"][wildcards.provider_key.upper()],
|
||||
provider_key = "{provider_key}",
|
||||
sensor_key = "fitbit_sleep_intraday"
|
||||
output:
|
||||
"data/interim/{pid}/fitbit_sleep_intraday_features/fitbit_sleep_intraday_r_{provider_key}.csv"
|
||||
script:
|
||||
"../src/features/entry.R"
|
||||
|
||||
rule merge_sensor_features_for_individual_participants:
|
||||
input:
|
||||
feature_files = input_merge_sensor_features_for_individual_participants
|
||||
|
|
|
@ -14,66 +14,53 @@ SLEEP_SUMMARY_COLUMNS_V1_2 = ("device_id", "efficiency",
|
|||
"timestamp")
|
||||
SLEEP_SUMMARY_COLUMNS_V1 = SLEEP_SUMMARY_COLUMNS_V1_2 + ("count_awake", "duration_awake", "count_awakenings", "count_restless", "duration_restless")
|
||||
|
||||
SLEEP_INTRADAY_COLUMNS = ("device_id",
|
||||
SLEEP_INTRADAY_COLUMNS = (# Extract "type_episode_id" field based on summary data: start from 0
|
||||
"type_episode_id",
|
||||
"duration",
|
||||
# For "classic" type, original_level is one of {"awake", "restless", "asleep"}
|
||||
# For "stages" type, original_level is one of {"wake", "deep", "light", "rem"}
|
||||
"level",
|
||||
# For "classic" type, unified_level is one of {0, 1} where 0: awake {"awake" + "restless"}, 1: asleep {"asleep"}
|
||||
# For "stages" type, unified_level is one of {0, 1} where 0: awake {"wake"}, 1: asleep {"deep" + "light" + "rem"}
|
||||
"unified_level",
|
||||
# one of {0, 1} where 0: nap, 1: main sleep
|
||||
# One of {0, 1} where 0: nap, 1: main sleep
|
||||
"is_main_sleep",
|
||||
# one of {"classic", "stages"}
|
||||
# One of {"classic", "stages"}
|
||||
"type",
|
||||
"local_date_time",
|
||||
"timestamp")
|
||||
"start_timestamp",
|
||||
"end_timestamp")
|
||||
|
||||
def mergeLongAndShortData(data_summary):
|
||||
longData = pd.DataFrame(columns=['dateTime', 'level', 'seconds'])
|
||||
shortData = pd.DataFrame(columns=['dateTime','level', 'seconds'])
|
||||
|
||||
windowLength = 30
|
||||
def mergeLongAndShortData(data_intraday):
|
||||
long_data = pd.DataFrame(columns=["dateTime", "level"])
|
||||
short_data = pd.DataFrame(columns=["dateTime", "level"])
|
||||
|
||||
for data in data_summary['data']:
|
||||
origEntry = data
|
||||
window_length = 30
|
||||
|
||||
for data in data_intraday["data"]:
|
||||
counter = 0
|
||||
numberOfSplits = origEntry['seconds']//windowLength
|
||||
for times in range(numberOfSplits):
|
||||
newRow = {'dateTime':dateutil.parser.parse(origEntry['dateTime'])+timedelta(seconds=counter*windowLength),'level':origEntry['level'],'seconds':windowLength}
|
||||
longData = longData.append(newRow, ignore_index = True)
|
||||
for times in range(data["seconds"] // window_length):
|
||||
row = {"dateTime": dateutil.parser.parse(data["dateTime"])+timedelta(seconds=counter*window_length), "level": data["level"]}
|
||||
long_data = long_data.append(row, ignore_index = True)
|
||||
counter = counter + 1
|
||||
|
||||
for data in data_summary['shortData']:
|
||||
origEntry = data
|
||||
for data in data_intraday["shortData"]:
|
||||
counter = 0
|
||||
numberOfSplits = origEntry['seconds']//windowLength
|
||||
for times in range(numberOfSplits):
|
||||
newRow = {'dateTime':dateutil.parser.parse(origEntry['dateTime'])+timedelta(seconds=counter*windowLength),'level':origEntry['level'],'seconds':windowLength}
|
||||
shortData = shortData.append(newRow,ignore_index = True)
|
||||
for times in range(data["seconds"] // window_length):
|
||||
row = {"dateTime": dateutil.parser.parse(data["dateTime"])+timedelta(seconds=counter*window_length), "level": data["level"]}
|
||||
short_data = short_data.append(row, ignore_index = True)
|
||||
counter = counter + 1
|
||||
longData.set_index('dateTime',inplace=True)
|
||||
shortData.set_index('dateTime',inplace=True)
|
||||
longData['level'] = np.where(longData.index.isin(shortData.index) == True,'wake',longData['level'])
|
||||
long_data.set_index("dateTime",inplace=True)
|
||||
short_data.set_index("dateTime",inplace=True)
|
||||
long_data["level"] = np.where(long_data.index.isin(short_data.index) == True, "wake", long_data["level"])
|
||||
|
||||
longData.reset_index(inplace=True)
|
||||
long_data.reset_index(inplace=True)
|
||||
|
||||
return longData.values.tolist()
|
||||
|
||||
def classicData1min(data_summary):
|
||||
dataList = list()
|
||||
for data in data_summary['data']:
|
||||
origEntry = data
|
||||
counter = 0
|
||||
timeDuration = 60
|
||||
numberOfSplits = origEntry['seconds']//timeDuration
|
||||
for times in range(numberOfSplits):
|
||||
newRow = {'dateTime':dateutil.parser.parse(origEntry['dateTime'])+timedelta(seconds=counter*timeDuration),'level':origEntry['level'],'seconds':timeDuration}
|
||||
dataList.append(newRow)
|
||||
counter = counter + 1
|
||||
return dataList
|
||||
return long_data.values.tolist()
|
||||
|
||||
# Parse one record for sleep API version 1
|
||||
def parseOneRecordForV1(record, device_id, d_is_main_sleep, records_summary, records_intraday, fitbit_data_type):
|
||||
def parseOneRecordForV1(record, device_id, type_episode_id, d_is_main_sleep, records_summary, records_intraday, fitbit_data_type):
|
||||
|
||||
sleep_record_type = "classic"
|
||||
|
||||
|
@ -110,16 +97,16 @@ def parseOneRecordForV1(record, device_id, d_is_main_sleep, records_summary, rec
|
|||
d_original_level = SLEEP_CODE2LEVEL[int(data["value"])-1]
|
||||
|
||||
|
||||
row_intraday = (device_id,
|
||||
row_intraday = (type_episode_id, 60,
|
||||
d_original_level, -1, d_is_main_sleep, sleep_record_type,
|
||||
d_datetime, 0)
|
||||
d_datetime, 0, 0)
|
||||
|
||||
records_intraday.append(row_intraday)
|
||||
|
||||
return records_summary, records_intraday
|
||||
|
||||
# Parse one record for sleep API version 1.2
|
||||
def parseOneRecordForV12(record, device_id, d_is_main_sleep, records_summary, records_intraday, fitbit_data_type):
|
||||
def parseOneRecordForV12(record, device_id, type_episode_id, d_is_main_sleep, records_summary, records_intraday, fitbit_data_type):
|
||||
|
||||
sleep_record_type = record['type']
|
||||
|
||||
|
@ -138,53 +125,25 @@ def parseOneRecordForV12(record, device_id, d_is_main_sleep, records_summary, re
|
|||
|
||||
# Intraday data
|
||||
if fitbit_data_type == "intraday":
|
||||
if sleep_record_type == 'classic':
|
||||
start_date = d_start_datetime.date()
|
||||
end_date = d_end_datetime.date()
|
||||
is_before_midnight = True
|
||||
curr_date = start_date
|
||||
data_summary = record['levels']
|
||||
dataSplitted = classicData1min(data_summary) ##Calling the function to split the data in regular 60 seconds interval
|
||||
for data in dataSplitted:
|
||||
# For overnight episodes, use end_date once we are over midnight
|
||||
d_time = data["dateTime"].time()
|
||||
if is_before_midnight and d_time.hour == 0:
|
||||
curr_date = end_date
|
||||
d_datetime = datetime.combine(curr_date, d_time)
|
||||
if sleep_record_type == "classic":
|
||||
for data in record["levels"]["data"]:
|
||||
d_datetime = dateutil.parser.parse(data["dateTime"])
|
||||
|
||||
d_original_level = data["level"]
|
||||
|
||||
row_intraday = (device_id,
|
||||
d_original_level, -1, d_is_main_sleep, sleep_record_type,
|
||||
d_datetime, 0)
|
||||
row_intraday = (type_episode_id, data["seconds"],
|
||||
data["level"], -1, d_is_main_sleep, sleep_record_type,
|
||||
d_datetime, 0, 0)
|
||||
records_intraday.append(row_intraday)
|
||||
else:
|
||||
# For sleep type "stages"
|
||||
start_date = d_start_datetime.date()
|
||||
end_date = d_end_datetime.date()
|
||||
is_before_midnight = True
|
||||
curr_date = start_date
|
||||
data_summary = record['levels']
|
||||
dataList = mergeLongAndShortData(data_summary)
|
||||
for data in dataList:
|
||||
|
||||
d_time = data[0].time()
|
||||
if is_before_midnight and d_time.hour == 0:
|
||||
curr_date = end_date
|
||||
d_datetime = datetime.combine(curr_date, d_time)
|
||||
|
||||
d_original_level = data[1]
|
||||
|
||||
row_intraday = (device_id,
|
||||
d_original_level, -1, d_is_main_sleep, sleep_record_type,
|
||||
d_datetime, 0)
|
||||
for data in mergeLongAndShortData(record["levels"]):
|
||||
row_intraday = (type_episode_id, 30,
|
||||
data[1], -1, d_is_main_sleep, sleep_record_type,
|
||||
data[0], 0, 0)
|
||||
|
||||
records_intraday.append(row_intraday)
|
||||
|
||||
return records_summary, records_intraday
|
||||
|
||||
|
||||
|
||||
def parseSleepData(sleep_data, fitbit_data_type):
|
||||
SLEEP_SUMMARY_COLUMNS = SLEEP_SUMMARY_COLUMNS_V1_2
|
||||
if sleep_data.empty:
|
||||
|
@ -194,6 +153,7 @@ def parseSleepData(sleep_data, fitbit_data_type):
|
|||
return pd.DataFrame(columns=SLEEP_INTRADAY_COLUMNS)
|
||||
device_id = sleep_data["device_id"].iloc[0]
|
||||
records_summary, records_intraday = [], []
|
||||
type_episode_id = 0
|
||||
# Parse JSON into individual records
|
||||
for multi_record in sleep_data.fitbit_data:
|
||||
for record in json.loads(multi_record)["sleep"]:
|
||||
|
@ -203,11 +163,13 @@ def parseSleepData(sleep_data, fitbit_data_type):
|
|||
# For sleep API version 1
|
||||
if "awakeCount" in record:
|
||||
SLEEP_SUMMARY_COLUMNS = SLEEP_SUMMARY_COLUMNS_V1
|
||||
records_summary, records_intraday = parseOneRecordForV1(record, device_id, d_is_main_sleep, records_summary, records_intraday, fitbit_data_type)
|
||||
records_summary, records_intraday = parseOneRecordForV1(record, device_id, type_episode_id, d_is_main_sleep, records_summary, records_intraday, fitbit_data_type)
|
||||
# For sleep API version 1.2
|
||||
else:
|
||||
SLEEP_SUMMARY_COLUMNS = SLEEP_SUMMARY_COLUMNS_V1_2
|
||||
records_summary, records_intraday = parseOneRecordForV12(record, device_id, d_is_main_sleep, records_summary, records_intraday, fitbit_data_type)
|
||||
records_summary, records_intraday = parseOneRecordForV12(record, device_id, type_episode_id, d_is_main_sleep, records_summary, records_intraday, fitbit_data_type)
|
||||
|
||||
type_episode_id = type_episode_id + 1
|
||||
|
||||
if fitbit_data_type == "summary":
|
||||
parsed_data = pd.DataFrame(data=records_summary, columns=SLEEP_SUMMARY_COLUMNS)
|
||||
|
@ -216,6 +178,19 @@ def parseSleepData(sleep_data, fitbit_data_type):
|
|||
|
||||
return parsed_data
|
||||
|
||||
def mergeSleepEpisodes(sleep_data, cols_for_groupby):
|
||||
sleep_episodes = pd.DataFrame(columns=["type_episode_id", "level_episode_id", "level", "unified_level", "is_main_sleep", "type", "start_timestamp", "end_timestamp"])
|
||||
if not sleep_data.empty:
|
||||
sleep_data = sleep_data.groupby(by=cols_for_groupby)
|
||||
sleep_episodes = sleep_data[["start_timestamp"]].first()
|
||||
sleep_episodes["end_timestamp"] = sleep_data["end_timestamp"].last()
|
||||
|
||||
sleep_episodes.reset_index(inplace=True, drop=False)
|
||||
|
||||
return sleep_episodes
|
||||
|
||||
|
||||
|
||||
timezone = snakemake.params["timezone"]
|
||||
column_format = snakemake.params["column_format"]
|
||||
fitbit_data_type = snakemake.params["fitbit_data_type"]
|
||||
|
@ -237,6 +212,9 @@ elif column_format == "PLAIN_TEXT":
|
|||
else:
|
||||
raise ValueError("column_format can only be one of ['JSON', 'PLAIN_TEXT'].")
|
||||
|
||||
# Drop duplicates
|
||||
parsed_data.drop_duplicates(inplace=True)
|
||||
|
||||
if parsed_data.shape[0] > 0 and fitbit_data_type == "summary":
|
||||
if sleep_episode_timestamp != "start" and sleep_episode_timestamp != "end":
|
||||
raise ValueError("SLEEP_EPISODE_TIMESTAMP can only be one of ['start', 'end'].")
|
||||
|
@ -245,6 +223,10 @@ if parsed_data.shape[0] > 0 and fitbit_data_type == "summary":
|
|||
|
||||
if not pd.isnull(local_start_date) and not pd.isnull(local_end_date):
|
||||
parsed_data = parsed_data.loc[(parsed_data[datetime_column] >= local_start_date) & (parsed_data[datetime_column] < local_end_date)]
|
||||
|
||||
# Sort by "local_start_date_time" column
|
||||
parsed_data.sort_values(by="local_start_date_time", ascending=True, inplace=True)
|
||||
|
||||
parsed_data["timestamp"] = parsed_data[datetime_column].dt.tz_localize(timezone, ambiguous=False, nonexistent="NaT").dropna().astype(np.int64) // 10**6
|
||||
parsed_data.dropna(subset=['timestamp'], inplace=True)
|
||||
parsed_data.drop(["local_start_date_time", "local_end_date_time"], axis = 1, inplace=True)
|
||||
|
@ -252,8 +234,18 @@ if parsed_data.shape[0] > 0 and fitbit_data_type == "summary":
|
|||
if parsed_data.shape[0] > 0 and fitbit_data_type == "intraday":
|
||||
if not pd.isnull(local_start_date) and not pd.isnull(local_end_date):
|
||||
parsed_data = parsed_data.loc[(parsed_data["local_date_time"] >= local_start_date) & (parsed_data["local_date_time"] < local_end_date)]
|
||||
parsed_data["timestamp"] = parsed_data["local_date_time"].dt.tz_localize(timezone, ambiguous=False, nonexistent="NaT").dropna().astype(np.int64) // 10**6
|
||||
parsed_data.dropna(subset=['timestamp'], inplace=True)
|
||||
parsed_data["unified_level"] = np.where(parsed_data["level"].isin(["awake", "wake", "restless"]), 0, 1)
|
||||
|
||||
# Sort by "local_date_time" column
|
||||
parsed_data.sort_values(by="local_date_time", ascending=True, inplace=True)
|
||||
|
||||
parsed_data["start_timestamp"] = parsed_data["local_date_time"].dt.tz_localize(timezone, ambiguous=False, nonexistent="NaT").dropna().astype(np.int64) // 10**6
|
||||
parsed_data.dropna(subset=['start_timestamp'], inplace=True)
|
||||
parsed_data["end_timestamp"] = parsed_data["start_timestamp"] + ((parsed_data["duration"] - 1) * 1000) + 999
|
||||
parsed_data["unified_level"] = np.where(parsed_data["level"].isin(["awake", "restless", "wake"]), 0, 1)
|
||||
|
||||
# Put consecutive rows with the same "level" field together and merge episodes
|
||||
parsed_data.insert(2, "level_episode_id", (parsed_data[["type_episode_id", "level"]] != parsed_data[["type_episode_id", "level"]].shift()).any(axis=1).cumsum())
|
||||
parsed_data = mergeSleepEpisodes(parsed_data, ["type_episode_id", "level_episode_id", "level", "unified_level", "is_main_sleep", "type"])
|
||||
|
||||
|
||||
parsed_data.to_csv(snakemake.output[0], index=False)
|
||||
|
|
|
@ -0,0 +1,245 @@
|
|||
import pandas as pd
|
||||
import itertools
|
||||
|
||||
|
||||
|
||||
def featuresFullNames(intraday_features_to_compute, sleep_levels_to_compute, day_types_to_compute):
|
||||
|
||||
features_fullnames = ["local_segment"]
|
||||
|
||||
sleep_level_with_group = []
|
||||
for sleep_level_group in sleep_levels_to_compute:
|
||||
for sleep_level in sleep_levels_to_compute[sleep_level_group]:
|
||||
sleep_level_with_group.append(sleep_level + sleep_level_group.lower())
|
||||
|
||||
for feature in intraday_features_to_compute:
|
||||
if feature == "avgduration":
|
||||
features_fullnames.extend(["avgduration" + x[0] + "main" + x[1].lower() for x in itertools.product(sleep_level_with_group, day_types_to_compute)])
|
||||
elif feature == "avgratioduration":
|
||||
features_fullnames.extend(["avgratioduration" + x[0] + "withinmain" + x[1].lower() for x in itertools.product(sleep_level_with_group, day_types_to_compute)])
|
||||
elif feature in ["avgstarttimeofepisodemain", "avgendtimeofepisodemain", "avgmidpointofepisodemain", "stdstarttimeofepisodemain", "stdendtimeofepisodemain", "stdmidpointofepisodemain"]:
|
||||
features_fullnames.extend([feature + x.lower() for x in day_types_to_compute])
|
||||
else:
|
||||
features_fullnames.append(feature)
|
||||
|
||||
return features_fullnames
|
||||
|
||||
def mergeSleepEpisodes(sleep_data, cols_for_groupby, base_sleep_levels):
|
||||
|
||||
sleep_level_with_group = []
|
||||
for sleep_level_group in base_sleep_levels:
|
||||
for sleep_level in base_sleep_levels[sleep_level_group]:
|
||||
sleep_level_with_group.append(sleep_level + sleep_level_group.lower())
|
||||
|
||||
sleep_episodes = pd.DataFrame(columns=["local_segment", "durationinbed", "start_timestamp", "end_timestamp", "local_start_date_time", "local_end_date_time"] + ["duration" + x for x in sleep_level_with_group])
|
||||
|
||||
if cols_for_groupby and (not sleep_data.empty):
|
||||
sleep_data = sleep_data.groupby(by=cols_for_groupby)
|
||||
sleep_episodes = sleep_data[["duration"]].sum().rename(columns={"duration": "durationinbed"})
|
||||
|
||||
sleep_episodes["start_timestamp"] = sleep_data["start_timestamp"].first()
|
||||
sleep_episodes["end_timestamp"] = sleep_data["end_timestamp"].last()
|
||||
|
||||
sleep_episodes["local_start_date_time"] = sleep_data["local_start_date_time"].first()
|
||||
sleep_episodes["local_end_date_time"] = sleep_data["local_end_date_time"].last()
|
||||
|
||||
for sleep_level in sleep_level_with_group:
|
||||
sleep_episodes["duration" + sleep_level] = sleep_data.apply(lambda group: group[group["level"] == sleep_level.replace("classic", "").replace("stages", "").replace("unified", "")]["duration"].sum())
|
||||
|
||||
sleep_episodes.reset_index(inplace=True, drop=False)
|
||||
del sleep_episodes["type_episode_id"]
|
||||
|
||||
return sleep_episodes
|
||||
|
||||
def extractDailyFeatures(sleep_data):
|
||||
daily_grouped = sleep_data.groupby(["local_segment", "fake_date"])
|
||||
daily_features = daily_grouped[["start_minutes"]].first().rename(columns={"start_minutes": "starttimeofepisodemain"})
|
||||
daily_features["endtimeofepisodemain"] = daily_grouped["end_minutes"].last()
|
||||
daily_features["midpointofepisodemain"] = (daily_features["starttimeofepisodemain"] + daily_features["endtimeofepisodemain"]) / 2
|
||||
daily_features["durationinbedmain"] = daily_grouped["durationinbed"].sum()
|
||||
|
||||
for col in sleep_data.columns:
|
||||
if col.startswith("duration") and col != "durationinbed":
|
||||
daily_features[col + "main"] = daily_grouped[col].sum().fillna(0)
|
||||
daily_features["ratio" + col + "withinmain"] = daily_features[col + "main"] / daily_features["durationinbedmain"]
|
||||
daily_features.reset_index(inplace=True)
|
||||
|
||||
# The day of the week with Monday=0, Sunday=6. Set Friday and Saturday as Weekend, others as Weekday.
|
||||
daily_features["is_weekend"] = pd.to_datetime(daily_features["fake_date"]).dt.dayofweek.apply(lambda x: 1 if (x == 4 or x == 5) else 0)
|
||||
|
||||
return daily_features
|
||||
|
||||
def statsOfDailyFeatures(daily_features, day_type, sleep_levels, intraday_features_to_compute, sleep_intraday_features):
|
||||
if day_type == "WEEKEND":
|
||||
daily_features = daily_features[daily_features["is_weekend"] == 0]
|
||||
elif day_type == "WEEK":
|
||||
daily_features = daily_features[daily_features["is_weekend"] == 1]
|
||||
elif day_type == "ALL":
|
||||
pass
|
||||
else:
|
||||
raise ValueError("Please make sure the [FITBIT_SLEEP_INTRADAY][PROVIDERS][PRICE][DAY_TYPES] parameter in config.yaml file only contains the subset of [WEEKEND, WEEK, ALL].")
|
||||
|
||||
if daily_features.empty:
|
||||
return sleep_intraday_features
|
||||
|
||||
if sleep_intraday_features.empty:
|
||||
sleep_intraday_features = pd.DataFrame()
|
||||
|
||||
# Average of time related features
|
||||
if "avgstarttimeofepisodemain" in intraday_features_to_compute:
|
||||
sleep_intraday_features = pd.concat([sleep_intraday_features, daily_features[["local_segment","starttimeofepisodemain"]].groupby("local_segment")["starttimeofepisodemain"].mean().to_frame().rename(columns={"starttimeofepisodemain": "avgstarttimeofepisodemain" + day_type.lower()})], axis=1)
|
||||
if "avgendtimeofepisodemain" in intraday_features_to_compute:
|
||||
sleep_intraday_features = pd.concat([sleep_intraday_features, daily_features[["local_segment","endtimeofepisodemain"]].groupby("local_segment")["endtimeofepisodemain"].mean().to_frame().rename(columns={"endtimeofepisodemain": "avgendtimeofepisodemain" + day_type.lower()})], axis=1)
|
||||
if "avgmidpointofepisodemain" in intraday_features_to_compute:
|
||||
sleep_intraday_features = pd.concat([sleep_intraday_features, daily_features[["local_segment","midpointofepisodemain"]].groupby("local_segment")["midpointofepisodemain"].mean().to_frame().rename(columns={"midpointofepisodemain": "avgmidpointofepisodemain" + day_type.lower()})], axis=1)
|
||||
|
||||
# Std of time related features
|
||||
if "stdstarttimeofepisodemain" in intraday_features_to_compute:
|
||||
sleep_intraday_features = pd.concat([sleep_intraday_features, daily_features[["local_segment","starttimeofepisodemain"]].groupby("local_segment")["starttimeofepisodemain"].std().to_frame().rename(columns={"starttimeofepisodemain": "stdstarttimeofepisodemain" + day_type.lower()})], axis=1)
|
||||
if "stdendtimeofepisodemain" in intraday_features_to_compute:
|
||||
sleep_intraday_features = pd.concat([sleep_intraday_features, daily_features[["local_segment","endtimeofepisodemain"]].groupby("local_segment")["endtimeofepisodemain"].std().to_frame().rename(columns={"endtimeofepisodemain": "stdendtimeofepisodemain" + day_type.lower()})], axis=1)
|
||||
if "stdmidpointofepisodemain" in intraday_features_to_compute:
|
||||
sleep_intraday_features = pd.concat([sleep_intraday_features, daily_features[["local_segment","midpointofepisodemain"]].groupby("local_segment")["midpointofepisodemain"].std().to_frame().rename(columns={"midpointofepisodemain": "stdmidpointofepisodemain" + day_type.lower()})], axis=1)
|
||||
|
||||
# Duration & Ratio features
|
||||
for sleep_level_group in sleep_levels:
|
||||
for sleep_level in sleep_levels[sleep_level_group]:
|
||||
if "avgduration" in intraday_features_to_compute:
|
||||
col = "duration" + sleep_level + sleep_level_group.lower() + "main"
|
||||
sleep_intraday_features = pd.concat([sleep_intraday_features, daily_features[["local_segment", col]].groupby("local_segment")[col].mean().to_frame().rename(columns={col: "avg" + col + day_type.lower()})], axis=1)
|
||||
if "avgratioduration" in intraday_features_to_compute:
|
||||
col = "ratioduration" + sleep_level + sleep_level_group.lower() + "withinmain"
|
||||
sleep_intraday_features = pd.concat([sleep_intraday_features, daily_features[["local_segment", col]].groupby("local_segment")[col].mean().to_frame().rename(columns={col: "avg" + col + day_type.lower()})], axis=1)
|
||||
|
||||
return sleep_intraday_features
|
||||
|
||||
def socialJetLagFeature(daily_features, sleep_intraday_features):
|
||||
daily_features_weekend = daily_features[daily_features["is_weekend"] == 1]
|
||||
sleep_intraday_features = pd.concat([sleep_intraday_features, daily_features_weekend[["local_segment","starttimeofepisodemain"]].groupby("local_segment")["starttimeofepisodemain"].mean().to_frame().rename(columns={"starttimeofepisodemain": "helper_weekend"})], axis=1)
|
||||
|
||||
daily_features_weekday = daily_features[daily_features["is_weekend"] == 0]
|
||||
sleep_intraday_features = pd.concat([sleep_intraday_features, daily_features_weekday[["local_segment","starttimeofepisodemain"]].groupby("local_segment")["starttimeofepisodemain"].mean().to_frame().rename(columns={"starttimeofepisodemain": "helper_weekday"})], axis=1)
|
||||
|
||||
sleep_intraday_features["socialjetlag"] = sleep_intraday_features["helper_weekend"] - sleep_intraday_features["helper_weekday"]
|
||||
|
||||
for col in ["helper_weekend", "helper_weekday"]:
|
||||
del sleep_intraday_features[col]
|
||||
|
||||
return sleep_intraday_features
|
||||
|
||||
def MSSDFeatures(daily_features, intraday_features_to_compute, sleep_intraday_features):
|
||||
|
||||
date_idx = pd.DataFrame(pd.date_range(start=daily_features["fake_date"].min(), end=daily_features["fake_date"].max(), freq="D"), columns=["fake_date"])
|
||||
date_idx["fake_date"] = date_idx["fake_date"].dt.date
|
||||
daily_features = daily_features.merge(date_idx, on="fake_date", how="right")
|
||||
|
||||
for col in ["starttimeofepisodemain", "endtimeofepisodemain", "midpointofepisodemain"]:
|
||||
daily_features[col + "_diff"] = daily_features[col].diff()
|
||||
|
||||
if "meanssdstarttimeofepisodemain" in intraday_features_to_compute:
|
||||
sleep_intraday_features = pd.concat([sleep_intraday_features, daily_features[["local_segment","starttimeofepisodemain_diff"]].groupby("local_segment")["starttimeofepisodemain_diff"].mean().to_frame().rename(columns={"starttimeofepisodemain_diff": "meanssdstarttimeofepisodemain"})], axis=1)
|
||||
if "meanssdendtimeofepisodemain" in intraday_features_to_compute:
|
||||
sleep_intraday_features = pd.concat([sleep_intraday_features, daily_features[["local_segment","endtimeofepisodemain_diff"]].groupby("local_segment")["endtimeofepisodemain_diff"].mean().to_frame().rename(columns={"endtimeofepisodemain_diff": "meanssdendtimeofepisodemain"})], axis=1)
|
||||
if "meanssdmidpointofepisodemain" in intraday_features_to_compute:
|
||||
sleep_intraday_features = pd.concat([sleep_intraday_features, daily_features[["local_segment","midpointofepisodemain_diff"]].groupby("local_segment")["midpointofepisodemain_diff"].mean().to_frame().rename(columns={"midpointofepisodemain_diff": "meanssdmidpointofepisodemain"})], axis=1)
|
||||
|
||||
if "medianssdstarttimeofepisodemain" in intraday_features_to_compute:
|
||||
sleep_intraday_features = pd.concat([sleep_intraday_features, daily_features[["local_segment","starttimeofepisodemain_diff"]].groupby("local_segment")["starttimeofepisodemain_diff"].median().to_frame().rename(columns={"starttimeofepisodemain_diff": "medianssdstarttimeofepisodemain"})], axis=1)
|
||||
if "medianssdendtimeofepisodemain" in intraday_features_to_compute:
|
||||
sleep_intraday_features = pd.concat([sleep_intraday_features, daily_features[["local_segment","endtimeofepisodemain_diff"]].groupby("local_segment")["endtimeofepisodemain_diff"].median().to_frame().rename(columns={"endtimeofepisodemain_diff": "medianssdendtimeofepisodemain"})], axis=1)
|
||||
if "medianssdmidpointofepisodemain" in intraday_features_to_compute:
|
||||
sleep_intraday_features = pd.concat([sleep_intraday_features, daily_features[["local_segment","midpointofepisodemain_diff"]].groupby("local_segment")["midpointofepisodemain_diff"].median().to_frame().rename(columns={"midpointofepisodemain_diff": "medianssdmidpointofepisodemain"})], axis=1)
|
||||
|
||||
return sleep_intraday_features
|
||||
|
||||
|
||||
|
||||
|
||||
def price_features(sensor_data_files, time_segment, provider, filter_data_by_segment, *args, **kwargs):
|
||||
|
||||
daily_start_time = provider["GROUP_EPISODES_WITHIN"]["START_TIME"]
|
||||
daily_end_time = daily_start_time + provider["GROUP_EPISODES_WITHIN"]["LENGTH"]
|
||||
|
||||
sleep_intraday_data = pd.read_csv(sensor_data_files["sensor_data"])
|
||||
requested_intraday_features = provider["FEATURES"]
|
||||
requested_sleep_levels = provider["SLEEP_LEVELS"]
|
||||
requested_day_types = provider["DAY_TYPES"]
|
||||
|
||||
# Name of the features this function can compute
|
||||
base_intraday_features = ["avgduration", "avgratioduration", "avgstarttimeofepisodemain", "avgendtimeofepisodemain", "avgmidpointofepisodemain", "stdstarttimeofepisodemain", "stdendtimeofepisodemain", "stdmidpointofepisodemain", "socialjetlag", "meanssdstarttimeofepisodemain", "meanssdendtimeofepisodemain", "meanssdmidpointofepisodemain", "medianssdstarttimeofepisodemain", "medianssdendtimeofepisodemain", "medianssdmidpointofepisodemain"]
|
||||
base_sleep_levels = {"CLASSIC": ["awake", "restless", "asleep"],
|
||||
"STAGES": ["wake", "deep", "light", "rem"],
|
||||
"UNIFIED": ["awake", "asleep"]}
|
||||
base_day_types = ["WEEKEND", "WEEK", "ALL"]
|
||||
|
||||
# The subset of requested features this function can compute
|
||||
intraday_features_to_compute = list(set(requested_intraday_features) & set(base_intraday_features))
|
||||
sleep_levels_to_compute = {key: list(set(requested_sleep_levels[key]) & set(base_sleep_levels[key])) for key in requested_sleep_levels if key in base_sleep_levels}
|
||||
day_types_to_compute = list(set(requested_day_types) & set(base_day_types))
|
||||
|
||||
# Full names
|
||||
features_fullnames = featuresFullNames(intraday_features_to_compute, sleep_levels_to_compute, day_types_to_compute)
|
||||
sleep_intraday_features = pd.DataFrame(columns=features_fullnames)
|
||||
|
||||
if sleep_intraday_data.empty:
|
||||
return sleep_intraday_features
|
||||
|
||||
# Filter by segemnts and chunk episodes
|
||||
sleep_intraday_data = filter_data_by_segment(sleep_intraday_data, time_segment)
|
||||
|
||||
# Discard segments shorter than one day
|
||||
sleep_intraday_data["segment_length"] = (sleep_intraday_data["segment_end_timestamp"] - sleep_intraday_data["segment_start_timestamp"]) / 1000 # in seconds
|
||||
sleep_intraday_data = sleep_intraday_data[sleep_intraday_data["segment_length"] >= 24 * 60 * 60 - 1]
|
||||
del sleep_intraday_data["segment_length"]
|
||||
|
||||
# Select main sleep records
|
||||
sleep_intraday_data = sleep_intraday_data[sleep_intraday_data["is_main_sleep"] == 1]
|
||||
|
||||
if sleep_intraday_data.empty:
|
||||
return sleep_intraday_features
|
||||
|
||||
# Merge rows to get sleep episodes
|
||||
main_sleep_episodes = mergeSleepEpisodes(sleep_intraday_data, ["local_segment", "type_episode_id"], base_sleep_levels)
|
||||
|
||||
# Extract number of minutes after midnight as start time; add duration to get the end time
|
||||
main_sleep_episodes["start_minutes"] = main_sleep_episodes["local_start_date_time"].apply(lambda x: x.hour * 60 + x.minute + x.second / 60)
|
||||
main_sleep_episodes["end_minutes"] = main_sleep_episodes["start_minutes"] + main_sleep_episodes["durationinbed"]
|
||||
# Extract fake date
|
||||
""" The rule used for fake date extraction
|
||||
set DS = daily_start_time, DE = daily_end_time
|
||||
set start = start_minutes, end = end_minutes
|
||||
|
||||
if (DS <= start < DE) or (DS < end <= DE) or (start <= DS and end >= DE):
|
||||
assign today
|
||||
elif if end <= DS:
|
||||
assign yesterday
|
||||
else: (same as start >=DE)
|
||||
assign tomorrow
|
||||
"""
|
||||
main_sleep_episodes["fake_date_delta"] = main_sleep_episodes[["start_minutes", "end_minutes"]].apply(lambda row: 0 if ((row["start_minutes"] >= daily_start_time and row["start_minutes"] < daily_end_time) or (row["end_minutes"] > daily_start_time and row["end_minutes"] <= daily_end_time) or (row["start_minutes"] <= daily_start_time and row["end_minutes"] >= daily_end_time)) else -1 if (row["end_minutes"] <= daily_start_time) else 1, axis=1)
|
||||
main_sleep_episodes["fake_date"] = (main_sleep_episodes["local_start_date_time"] + pd.to_timedelta(main_sleep_episodes["fake_date_delta"], unit="d")).dt.date
|
||||
|
||||
# Update "start_minutes" column based on START_TIME
|
||||
main_sleep_episodes["start_minutes"] = main_sleep_episodes[["start_minutes", "fake_date_delta"]].apply(lambda row: row["start_minutes"] - 24 * 60 * row["fake_date_delta"], axis=1)
|
||||
main_sleep_episodes["end_minutes"] = main_sleep_episodes["start_minutes"] + main_sleep_episodes["durationinbed"]
|
||||
|
||||
# We keep a sleep episode that intersects or contains the period between [START_TIME, START_TIME + LENGTH], aka [daily_start_time, daily_end_time].
|
||||
main_sleep_episodes = main_sleep_episodes.query("(start_minutes >= @daily_start_time and start_minutes < @daily_end_time) or (end_minutes > @daily_start_time and end_minutes <= @daily_end_time) or (start_minutes <= @daily_start_time and end_minutes >= @daily_end_time)")
|
||||
|
||||
# Sort main sleep episodes based on fake_date and start_minutes
|
||||
main_sleep_episodes = main_sleep_episodes.sort_values(["fake_date", "start_minutes"])
|
||||
# Extract daily features
|
||||
daily_features = extractDailyFeatures(main_sleep_episodes)
|
||||
|
||||
# Extract features per segment based on daily features
|
||||
for day_type in day_types_to_compute:
|
||||
sleep_intraday_features = statsOfDailyFeatures(daily_features, day_type, sleep_levels_to_compute, intraday_features_to_compute, sleep_intraday_features)
|
||||
if "socialjetlag" in intraday_features_to_compute:
|
||||
sleep_intraday_features = socialJetLagFeature(daily_features, sleep_intraday_features)
|
||||
sleep_intraday_features = MSSDFeatures(daily_features, intraday_features_to_compute, sleep_intraday_features)
|
||||
|
||||
sleep_intraday_features.index.name = "local_segment"
|
||||
sleep_intraday_features.reset_index(inplace=True)
|
||||
|
||||
|
||||
return sleep_intraday_features
|
|
@ -0,0 +1,265 @@
|
|||
import pandas as pd
|
||||
from datetime import datetime
|
||||
import itertools
|
||||
|
||||
def featuresFullNames(intraday_features_to_compute, sleep_levels_to_compute, sleep_types_to_compute, consider_all):
|
||||
|
||||
features_fullname = ["local_segment"]
|
||||
|
||||
sleep_level_with_group = []
|
||||
for sleep_level_group in sleep_levels_to_compute:
|
||||
for sleep_level in sleep_levels_to_compute[sleep_level_group]:
|
||||
sleep_level_with_group.append(sleep_level + sleep_level_group.lower())
|
||||
|
||||
if consider_all:
|
||||
features_fullname.extend([x[0] + x[1] + x[2] for x in itertools.product(intraday_features_to_compute["LEVELS_AND_TYPES"], sleep_level_with_group + ["all"], sleep_types_to_compute + ["all"])])
|
||||
else:
|
||||
features_fullname.extend([x[0] + x[1] + x[2] for x in itertools.product(intraday_features_to_compute["LEVELS_AND_TYPES"], sleep_level_with_group, sleep_types_to_compute)])
|
||||
if "ACROSS_LEVELS" in intraday_features_to_compute["RATIOS_SCOPE"]:
|
||||
features_fullname.extend(["ratio" + x[0] + x[1] for x in itertools.product(intraday_features_to_compute["RATIOS_TYPE"], sleep_level_with_group)])
|
||||
if "ACROSS_TYPES" in intraday_features_to_compute["RATIOS_SCOPE"] and "main" in sleep_types_to_compute:
|
||||
features_fullname.extend(["ratio" + x + "main" for x in intraday_features_to_compute["RATIOS_TYPE"]])
|
||||
if "WITHIN_LEVELS" in intraday_features_to_compute["RATIOS_SCOPE"]:
|
||||
features_fullname.extend(["ratio" + x[0] + x[1] + "within" + x[2] for x in itertools.product(intraday_features_to_compute["RATIOS_TYPE"], sleep_types_to_compute, sleep_level_with_group)])
|
||||
if "WITHIN_TYPES" in intraday_features_to_compute["RATIOS_SCOPE"]:
|
||||
features_fullname.extend(["ratio" + x[0] + x[1] + "within" + x[2] for x in itertools.product(intraday_features_to_compute["RATIOS_TYPE"], sleep_level_with_group, sleep_types_to_compute)])
|
||||
features_fullname.extend(intraday_features_to_compute["ROUTINE"])
|
||||
return features_fullname
|
||||
|
||||
def mergeSleepEpisodes(sleep_data, cols_for_groupby):
|
||||
|
||||
sleep_episodes = pd.DataFrame(columns=["local_segment", "duration", "start_timestamp", "end_timestamp", "local_start_date_time", "local_end_date_time"])
|
||||
|
||||
if cols_for_groupby and (not sleep_data.empty):
|
||||
sleep_data = sleep_data.groupby(by=cols_for_groupby)
|
||||
sleep_episodes = sleep_data[["duration"]].sum()
|
||||
sleep_episodes["start_timestamp"] = sleep_data["start_timestamp"].first()
|
||||
sleep_episodes["end_timestamp"] = sleep_data["end_timestamp"].last()
|
||||
sleep_episodes["local_start_date_time"] = sleep_data["local_start_date_time"].first()
|
||||
sleep_episodes["local_end_date_time"] = sleep_data["local_end_date_time"].last()
|
||||
|
||||
sleep_episodes.reset_index(inplace=True, drop=False)
|
||||
|
||||
return sleep_episodes
|
||||
|
||||
def statsFeatures(sleep_episodes, features, episode_type):
|
||||
|
||||
episode_features = pd.DataFrame(columns=[feature + episode_type for feature in features])
|
||||
if sleep_episodes.empty:
|
||||
return episode_features
|
||||
|
||||
if "countepisode" in features:
|
||||
episode_features["countepisode" + episode_type] = sleep_episodes[["local_segment", "duration"]].groupby(["local_segment"])["duration"].count()
|
||||
if "sumduration" in features:
|
||||
episode_features["sumduration" + episode_type] = sleep_episodes[["local_segment", "duration"]].groupby(["local_segment"])["duration"].sum()
|
||||
if "maxduration" in features:
|
||||
episode_features["maxduration" + episode_type] = sleep_episodes[["local_segment", "duration"]].groupby(["local_segment"])["duration"].max()
|
||||
if "minduration" in features:
|
||||
episode_features["minduration" + episode_type] = sleep_episodes[["local_segment", "duration"]].groupby(["local_segment"])["duration"].min()
|
||||
if "avgduration" in features:
|
||||
episode_features["avgduration" + episode_type] = sleep_episodes[["local_segment", "duration"]].groupby(["local_segment"])["duration"].mean()
|
||||
if "medianduration" in features:
|
||||
episode_features["medianduration" + episode_type] = sleep_episodes[["local_segment", "duration"]].groupby(["local_segment"])["duration"].median()
|
||||
if "stdduration" in features:
|
||||
episode_features["stdduration" + episode_type] = sleep_episodes[["local_segment", "duration"]].groupby(["local_segment"])["duration"].std()
|
||||
|
||||
return episode_features
|
||||
|
||||
def allStatsFeatures(sleep_data, base_sleep_levels, base_sleep_types, features, sleep_intraday_features):
|
||||
|
||||
# For CLASSIC
|
||||
for sleep_level, sleep_type in itertools.product(base_sleep_levels["CLASSIC"] + ["all"], base_sleep_types + ["all"]):
|
||||
sleep_episodes_classic = sleep_data[sleep_data["is_main_sleep"] == (1 if sleep_type == "main" else 0)] if sleep_type != "all" else sleep_data
|
||||
sleep_episodes_classic = sleep_episodes_classic[sleep_episodes_classic["level"] == sleep_level] if sleep_level != "all" else sleep_episodes_classic
|
||||
sleep_intraday_features = pd.concat([sleep_intraday_features, statsFeatures(sleep_episodes_classic, features, sleep_level + "classic" + sleep_type)], axis=1)
|
||||
|
||||
# For STAGES
|
||||
for sleep_level, sleep_type in itertools.product(base_sleep_levels["STAGES"] + ["all"], base_sleep_types + ["all"]):
|
||||
sleep_episodes_stages = sleep_data[sleep_data["is_main_sleep"] == (1 if sleep_type == "main" else 0)] if sleep_type != "all" else sleep_data
|
||||
sleep_episodes_stages = sleep_episodes_stages[sleep_episodes_stages["level"] == sleep_level] if sleep_level != "all" else sleep_episodes_stages
|
||||
sleep_intraday_features = pd.concat([sleep_intraday_features, statsFeatures(sleep_episodes_stages, features, sleep_level + "stages" + sleep_type)], axis=1)
|
||||
|
||||
# For UNIFIED
|
||||
for sleep_level, sleep_type in itertools.product(base_sleep_levels["UNIFIED"] + ["all"], base_sleep_types + ["all"]):
|
||||
sleep_episodes_unified = sleep_data[sleep_data["is_main_sleep"] == (1 if sleep_type == "main" else 0)] if sleep_type != "all" else sleep_data
|
||||
sleep_episodes_unified = sleep_episodes_unified[sleep_episodes_unified["unified_level"] == (0 if sleep_level == "awake" else 1)] if sleep_level != "all" else sleep_episodes_unified
|
||||
sleep_episodes_unified = mergeSleepEpisodes(sleep_episodes_unified, ["local_segment", "unified_level_episode_id"])
|
||||
sleep_intraday_features = pd.concat([sleep_intraday_features, statsFeatures(sleep_episodes_unified, features, sleep_level + "unified" + sleep_type)], axis=1)
|
||||
|
||||
# Ignore the levels (e.g. countepisode[all][main])
|
||||
for sleep_type in base_sleep_types + ["all"]:
|
||||
sleep_episodes_none = sleep_data[sleep_data["is_main_sleep"] == (1 if sleep_type == "main" else 0)] if sleep_type != "all" else sleep_data
|
||||
sleep_episodes_none = mergeSleepEpisodes(sleep_episodes_none, ["local_segment", "type_episode_id"])
|
||||
sleep_intraday_features = pd.concat([sleep_intraday_features, statsFeatures(sleep_episodes_none, features, "all" + sleep_type)], axis=1)
|
||||
|
||||
return sleep_intraday_features
|
||||
|
||||
|
||||
# Since all the stats features have been computed no matter they are requested or not,
|
||||
# we can pick the related features to calculate the RATIOS features directly.
|
||||
# Take ACROSS_LEVELS RATIOS features as an example:
|
||||
# ratiocount[remstages] = countepisode[remstages][all] / countepisode[all][all]
|
||||
def ratiosFeatures(sleep_intraday_features, ratios_types, ratios_scopes, sleep_levels, sleep_types):
|
||||
|
||||
# Put sleep_level_group and sleep_level together.
|
||||
# For example:
|
||||
# input (sleep_levels): {"CLASSIC": ["awake", "restless", "asleep"], "UNIFIED": ["awake", "asleep"]}
|
||||
# output (sleep_level_with_group): [("classic", "awake"), ("classic", "restless"), ("classic", "asleep"), ("unified", "awake"), ("unified", "asleep")]
|
||||
sleep_level_with_group = []
|
||||
for sleep_level_group in sleep_levels:
|
||||
for sleep_level in sleep_levels[sleep_level_group]:
|
||||
sleep_level_with_group.append((sleep_level_group.lower(), sleep_level))
|
||||
|
||||
# ACROSS LEVELS
|
||||
if "ACROSS_LEVELS" in ratios_scopes:
|
||||
# Get the cross product of ratios_types and sleep_level_with_group.
|
||||
# For example:
|
||||
# input: ratios_types is ["count", "duration"], sleep_level_with_group is [("classic", "awake"), ("classic", "restless"), ("unified", "asleep")]
|
||||
# output:
|
||||
# 1) ratios_type: "count", sleep_levels_combined: ("classic", "awake")
|
||||
# 2) ratios_type: "count", sleep_levels_combined: ("classic", "restless")
|
||||
# 3) ratios_type: "count", sleep_levels_combined: ("unified", "asleep")
|
||||
# 4) ratios_type: "duration", sleep_levels_combined: ("classic", "awake")
|
||||
# 5) ratios_type: "duration", sleep_levels_combined: ("classic", "restless")
|
||||
# 6) ratios_type: "duration", sleep_levels_combined: ("unified", "asleep")
|
||||
for ratios_type, sleep_levels_combined in itertools.product(ratios_types, sleep_level_with_group):
|
||||
sleep_level_group, sleep_level = sleep_levels_combined[0], sleep_levels_combined[1]
|
||||
agg_func = "countepisode" if ratios_type == "count" else "sumduration"
|
||||
across_levels = (sleep_intraday_features[agg_func + sleep_level + sleep_level_group + "all"] / sleep_intraday_features[agg_func + "all" + sleep_level_group + "all"]).to_frame().rename(columns={0: "ratio" + ratios_type + sleep_level + sleep_level_group})
|
||||
sleep_intraday_features = pd.concat([sleep_intraday_features, across_levels], axis=1)
|
||||
|
||||
# ACROSS TYPES
|
||||
if "ACROSS_TYPES" in ratios_scopes:
|
||||
for ratios_type in ratios_types:
|
||||
agg_func = "countepisode" if ratios_type == "count" else "sumduration"
|
||||
across_types = (sleep_intraday_features[agg_func + "allmain"] / sleep_intraday_features[agg_func + "allall"]).to_frame().rename(columns={0: "ratio" + ratios_type + "main"})
|
||||
sleep_intraday_features = pd.concat([sleep_intraday_features, across_types], axis=1)
|
||||
|
||||
# Get the cross product of ratios_types, sleep_level_with_group, and sleep_types.
|
||||
# For example:
|
||||
# input:
|
||||
# ratios_types is ["count", "duration"]
|
||||
# sleep_level_with_group is [("classic", "awake"), ("unified", "asleep")]
|
||||
# sleep_types is ["main", "nap"]
|
||||
# output:
|
||||
# 1) ratios_type: "count", sleep_levels_combined: ("classic", "awake"), sleep_type: "main"
|
||||
# 2) ratios_type: "count", sleep_levels_combined: ("classic", "awake"), sleep_type: "nap"
|
||||
# 3) ratios_type: "count", sleep_levels_combined: ("unified", "asleep"), sleep_type: "main"
|
||||
# 4) ratios_type: "count", sleep_levels_combined: ("unified", "asleep"), sleep_type: "nap"
|
||||
# 5) ratios_type: "duration", sleep_levels_combined: ("classic", "awake"), sleep_type: "main"
|
||||
# 6) ratios_type: "duration", sleep_levels_combined: ("classic", "awake"), sleep_type: "nap"
|
||||
# 7) ratios_type: "duration", sleep_levels_combined: ("unified", "asleep"), sleep_type: "main"
|
||||
# 8) ratios_type: "duration", sleep_levels_combined: ("unified", "asleep"), sleep_type: "nap"
|
||||
for ratios_type, sleep_levels_combined, sleep_type in itertools.product(ratios_types, sleep_level_with_group, sleep_types):
|
||||
sleep_level_group, sleep_level = sleep_levels_combined[0], sleep_levels_combined[1]
|
||||
agg_func = "countepisode" if ratios_type == "count" else "sumduration"
|
||||
|
||||
# WITHIN LEVELS
|
||||
if "WITHIN_LEVELS" in ratios_scopes:
|
||||
within_levels = (sleep_intraday_features[agg_func + sleep_level + sleep_level_group + sleep_type] / sleep_intraday_features[agg_func + sleep_level + sleep_level_group + "all"]).to_frame().rename(columns={0: "ratio" + ratios_type + sleep_type + "within" + sleep_level + sleep_level_group})
|
||||
sleep_intraday_features = pd.concat([sleep_intraday_features, within_levels], axis=1)
|
||||
|
||||
# WITHIN TYPES
|
||||
if "WITHIN_TYPES" in ratios_scopes:
|
||||
within_types = (sleep_intraday_features[agg_func + sleep_level + sleep_level_group + sleep_type] / sleep_intraday_features[agg_func + "all" + sleep_level_group + sleep_type]).to_frame().rename(columns={0: "ratio" + ratios_type + sleep_level + sleep_level_group + "within" + sleep_type})
|
||||
sleep_intraday_features = pd.concat([sleep_intraday_features, within_types], axis=1)
|
||||
|
||||
return sleep_intraday_features
|
||||
|
||||
|
||||
def singleSleepTypeRoutineFeatures(sleep_intraday_data, routine, reference_time, sleep_type, sleep_intraday_features):
|
||||
|
||||
sleep_intraday_data = sleep_intraday_data[sleep_intraday_data["is_main_sleep"] == (1 if sleep_type == "mainsleep" else 0)]
|
||||
if "starttimefirst" + sleep_type in routine:
|
||||
grouped_first = sleep_intraday_data.groupby(["local_segment"]).first()
|
||||
if reference_time == "MIDNIGHT":
|
||||
sleep_intraday_features["starttimefirst" + sleep_type] = grouped_first["local_start_date_time"].apply(lambda x: x.hour * 60 + x.minute + x.second / 60)
|
||||
elif reference_time == "START_OF_THE_SEGMENT":
|
||||
sleep_intraday_features["starttimefirst" + sleep_type] = (grouped_first["start_timestamp"] - grouped_first["segment_start_timestamp"]) / (60 * 1000)
|
||||
else:
|
||||
raise ValueError("Please check FITBIT_SLEEP_INTRADAY section of config.yaml: REFERENCE_TIME can only be MIDNIGHT or START_OF_THE_SEGMENT.")
|
||||
|
||||
if "endtimelast" + sleep_type in routine:
|
||||
grouped_last = sleep_intraday_data.groupby(["local_segment"]).last()
|
||||
if reference_time == "MIDNIGHT":
|
||||
sleep_intraday_features["endtimelast" + sleep_type] = grouped_last["local_end_date_time"].apply(lambda x: x.hour * 60 + x.minute + x.second / 60)
|
||||
elif reference_time == "START_OF_THE_SEGMENT":
|
||||
sleep_intraday_features["endtimelast" + sleep_type] = (grouped_last["end_timestamp"] - grouped_last["segment_start_timestamp"]) / (60 * 1000)
|
||||
else:
|
||||
raise ValueError("Please check FITBIT_SLEEP_INTRADAY section of config.yaml: REFERENCE_TIME can only be MIDNIGHT or START_OF_THE_SEGMENT.")
|
||||
|
||||
return sleep_intraday_features
|
||||
|
||||
def routineFeatures(sleep_intraday_data, routine, reference_time, sleep_type, sleep_intraday_features):
|
||||
|
||||
if "starttimefirstmainsleep" in routine or "endtimelastmainsleep" in routine:
|
||||
sleep_intraday_features = singleSleepTypeRoutineFeatures(sleep_intraday_data, routine, reference_time, "mainsleep", sleep_intraday_features)
|
||||
|
||||
if "starttimefirstnap" in routine or "endtimelastnap" in routine:
|
||||
sleep_intraday_features = singleSleepTypeRoutineFeatures(sleep_intraday_data, routine, reference_time, "nap", sleep_intraday_features)
|
||||
|
||||
return sleep_intraday_features
|
||||
|
||||
|
||||
def rapids_features(sensor_data_files, time_segment, provider, filter_data_by_segment, *args, **kwargs):
|
||||
|
||||
sleep_intraday_data = pd.read_csv(sensor_data_files["sensor_data"])
|
||||
|
||||
consider_all = provider["FEATURES"]["LEVELS_AND_TYPES_COMBINING_ALL"]
|
||||
include_sleep_later_than = provider["INCLUDE_SLEEP_LATER_THAN"]
|
||||
reference_time = provider["REFERENCE_TIME"]
|
||||
|
||||
requested_intraday_features = provider["FEATURES"]
|
||||
requested_sleep_levels = provider["SLEEP_LEVELS"]
|
||||
requested_sleep_types = provider["SLEEP_TYPES"]
|
||||
|
||||
# Name of the features this function can compute
|
||||
base_intraday_features = {"LEVELS_AND_TYPES": ["countepisode", "sumduration", "maxduration", "minduration", "avgduration", "medianduration", "stdduration"],
|
||||
"RATIOS_TYPE": ["count", "duration"],
|
||||
"RATIOS_SCOPE": ["ACROSS_LEVELS", "ACROSS_TYPES", "WITHIN_LEVELS", "WITHIN_TYPES"],
|
||||
"ROUTINE": ["starttimefirstmainsleep", "endtimelastmainsleep", "starttimefirstnap", "endtimelastnap"]}
|
||||
base_sleep_levels = {"CLASSIC": ["awake", "restless", "asleep"],
|
||||
"STAGES": ["wake", "deep", "light", "rem"],
|
||||
"UNIFIED": ["awake", "asleep"]}
|
||||
base_sleep_types = ["main", "nap"]
|
||||
|
||||
# The subset of requested features this function can compute
|
||||
intraday_features_to_compute = {key: list(set(requested_intraday_features[key]) & set(base_intraday_features[key])) for key in requested_intraday_features if key in base_intraday_features}
|
||||
sleep_levels_to_compute = {key: list(set(requested_sleep_levels[key]) & set(base_sleep_levels[key])) for key in requested_sleep_levels if key in base_sleep_levels}
|
||||
sleep_types_to_compute = list(set(requested_sleep_types) & set(base_sleep_types))
|
||||
|
||||
# Full names
|
||||
features_fullnames = featuresFullNames(intraday_features_to_compute, sleep_levels_to_compute, sleep_types_to_compute, consider_all)
|
||||
sleep_intraday_features = pd.DataFrame(columns=features_fullnames)
|
||||
|
||||
# Include sleep later than
|
||||
start_minutes = sleep_intraday_data.groupby("start_timestamp").first()["local_time"].apply(lambda x: int(x.split(":")[0]) * 60 + int(x.split(":")[1]) + int(x.split(":")[2]) / 60).to_frame().rename(columns={"local_time": "start_minutes"}).reset_index()
|
||||
sleep_intraday_data = sleep_intraday_data.merge(start_minutes, on="start_timestamp", how="left")
|
||||
sleep_intraday_data = sleep_intraday_data[sleep_intraday_data["start_minutes"] >= include_sleep_later_than]
|
||||
del sleep_intraday_data["start_minutes"]
|
||||
|
||||
sleep_intraday_data = filter_data_by_segment(sleep_intraday_data, time_segment)
|
||||
|
||||
# While level_episode_id is based on levels provided by Fitbit (classic & stages), unified_level_episode_id is based on unified_level.
|
||||
sleep_intraday_data.insert(3, "unified_level_episode_id", (sleep_intraday_data[["type_episode_id", "unified_level"]] != sleep_intraday_data[["type_episode_id", "unified_level"]].shift()).any(axis=1).cumsum())
|
||||
|
||||
if not sleep_intraday_data.empty:
|
||||
|
||||
sleep_intraday_features = pd.DataFrame()
|
||||
|
||||
# ALL LEVELS AND TYPES: compute all stats features no matter they are requested or not
|
||||
sleep_intraday_features = allStatsFeatures(sleep_intraday_data, base_sleep_levels, base_sleep_types, base_intraday_features["LEVELS_AND_TYPES"], sleep_intraday_features)
|
||||
|
||||
# RATIOS: only compute requested features
|
||||
sleep_intraday_features = ratiosFeatures(sleep_intraday_features, intraday_features_to_compute["RATIOS_TYPE"], intraday_features_to_compute["RATIOS_SCOPE"], sleep_levels_to_compute, sleep_types_to_compute)
|
||||
|
||||
# ROUTINE: only compute requested features
|
||||
sleep_intraday_features = routineFeatures(sleep_intraday_data, intraday_features_to_compute["ROUTINE"], reference_time, sleep_types_to_compute, sleep_intraday_features)
|
||||
|
||||
# Reset index and discard features which are not requested by user
|
||||
sleep_intraday_features.index.name = "local_segment"
|
||||
sleep_intraday_features.reset_index(inplace=True)
|
||||
sleep_intraday_features = sleep_intraday_features[features_fullnames]
|
||||
|
||||
|
||||
return sleep_intraday_features
|
|
@ -55,7 +55,7 @@ def chunk_episodes(sensor_episodes):
|
|||
sensor_episodes["duration"] = (sensor_episodes["chunked_end_timestamp"] - sensor_episodes["chunked_start_timestamp"]) / (1000 * 60)
|
||||
|
||||
# Merge episodes
|
||||
cols_for_groupby = [col for col in sensor_episodes.columns if col not in ["timestamps_segment", "timestamp", "assigned_segments", "start_datetime", "end_datetime", "start_timestamp", "end_timestamp", "duration", "segment_start_timestamp", "segment_end_timestamp", "chunked_start_timestamp", "chunked_end_timestamp"]]
|
||||
cols_for_groupby = [col for col in sensor_episodes.columns if col not in ["timestamps_segment", "timestamp", "assigned_segments", "start_datetime", "end_datetime", "start_timestamp", "end_timestamp", "duration", "chunked_start_timestamp", "chunked_end_timestamp"]]
|
||||
|
||||
sensor_episodes_grouped = sensor_episodes.groupby(by=cols_for_groupby)
|
||||
merged_sensor_episodes = sensor_episodes_grouped[["duration"]].sum()
|
||||
|
|
|
@ -29,6 +29,7 @@ required:
|
|||
- FITBIT_HEARTRATE_SUMMARY
|
||||
- FITBIT_HEARTRATE_INTRADAY
|
||||
- FITBIT_SLEEP_SUMMARY
|
||||
- FITBIT_SLEEP_INTRADAY
|
||||
- FITBIT_STEPS_SUMMARY
|
||||
- FITBIT_STEPS_INTRADAY
|
||||
- HISTOGRAM_PHONE_DATA_YIELD
|
||||
|
@ -896,6 +897,136 @@ properties:
|
|||
additionalProperties:
|
||||
$ref: "#/definitions/PROVIDER"
|
||||
|
||||
FITBIT_SLEEP_INTRADAY:
|
||||
type: object
|
||||
required: [TABLE, PROVIDERS]
|
||||
properties:
|
||||
TABLE:
|
||||
type: string
|
||||
PROVIDERS:
|
||||
type: ["null", object]
|
||||
properties:
|
||||
RAPIDS:
|
||||
allOf:
|
||||
- $ref: "#/definitions/PROVIDER"
|
||||
- properties:
|
||||
FEATURES:
|
||||
type: object
|
||||
required: [LEVELS_AND_TYPES_COMBINING_ALL, LEVELS_AND_TYPES, RATIOS_TYPE, RATIOS_SCOPE, ROUTINE]
|
||||
properties:
|
||||
LEVELS_AND_TYPES_COMBINING_ALL:
|
||||
type: boolean
|
||||
LEVELS_AND_TYPES:
|
||||
type: array
|
||||
uniqueItems: True
|
||||
items:
|
||||
type: string
|
||||
enum: [countepisode, sumduration, maxduration, minduration, avgduration, medianduration, stdduration]
|
||||
RATIOS_TYPE:
|
||||
type: array
|
||||
uniqueItems: True
|
||||
items:
|
||||
type: string
|
||||
enum: [count, duration]
|
||||
RATIOS_SCOPE:
|
||||
type: array
|
||||
uniqueItems: True
|
||||
items:
|
||||
type: string
|
||||
enum: [ACROSS_LEVELS, ACROSS_TYPES, WITHIN_LEVELS, WITHIN_TYPES]
|
||||
ROUTINE:
|
||||
type: array
|
||||
uniqueItems: True
|
||||
items:
|
||||
type: string
|
||||
enum: [starttimefirstmainsleep, endtimelastmainsleep, starttimefirstnap, endtimelastnap]
|
||||
SLEEP_LEVELS:
|
||||
type: object
|
||||
required: [CLASSIC, STAGES, UNIFIED]
|
||||
properties:
|
||||
CLASSIC:
|
||||
type: array
|
||||
uniqueItems: True
|
||||
items:
|
||||
type: string
|
||||
enum: [awake, restless, asleep]
|
||||
STAGES:
|
||||
type: array
|
||||
uniqueItems: True
|
||||
items:
|
||||
type: string
|
||||
enum: [wake, deep, light, rem]
|
||||
UNIFIED:
|
||||
type: array
|
||||
uniqueItems: True
|
||||
items:
|
||||
type: string
|
||||
enum: [awake, asleep]
|
||||
SLEEP_TYPES:
|
||||
type: array
|
||||
uniqueItems: True
|
||||
items:
|
||||
type: string
|
||||
enum: [main, nap]
|
||||
INCLUDE_SLEEP_LATER_THAN:
|
||||
type: number
|
||||
minimum: 0
|
||||
maximum: 1439
|
||||
REFERENCE_TIME:
|
||||
type: string
|
||||
enum: [MIDNIGHT, START_OF_THE_SEGMENT]
|
||||
PRICE:
|
||||
allOf:
|
||||
- $ref: "#/definitions/PROVIDER"
|
||||
- properties:
|
||||
FEATURES:
|
||||
uniqueItems: True
|
||||
items:
|
||||
type: string
|
||||
enum: [avgduration, avgratioduration, avgstarttimeofepisodemain, avgendtimeofepisodemain, avgmidpointofepisodemain, "stdstarttimeofepisodemain", "stdendtimeofepisodemain", "stdmidpointofepisodemain", socialjetlag, meanssdstarttimeofepisodemain, meanssdendtimeofepisodemain, meanssdmidpointofepisodemain, medianssdstarttimeofepisodemain, medianssdendtimeofepisodemain, medianssdmidpointofepisodemain]
|
||||
SLEEP_LEVELS:
|
||||
type: object
|
||||
required: [CLASSIC, STAGES, UNIFIED]
|
||||
properties:
|
||||
CLASSIC:
|
||||
type: array
|
||||
uniqueItems: True
|
||||
items:
|
||||
type: string
|
||||
enum: [awake, restless, asleep]
|
||||
STAGES:
|
||||
type: array
|
||||
uniqueItems: True
|
||||
items:
|
||||
type: string
|
||||
enum: [wake, deep, light, rem]
|
||||
UNIFIED:
|
||||
type: array
|
||||
uniqueItems: True
|
||||
items:
|
||||
type: string
|
||||
enum: [awake, asleep]
|
||||
DAY_TYPES:
|
||||
type: array
|
||||
uniqueItems: True
|
||||
items:
|
||||
type: string
|
||||
enum: [WEEKEND, WEEK, ALL]
|
||||
GROUP_EPISODES_WITHIN:
|
||||
type: object
|
||||
required: [START_TIME, LENGTH]
|
||||
properties:
|
||||
START_TIME:
|
||||
type: number
|
||||
minimum: 0
|
||||
maximum: 1439
|
||||
LENGTH:
|
||||
type: number
|
||||
minimum: 0
|
||||
maximum: 1440
|
||||
additionalProperties:
|
||||
$ref: "#/definitions/PROVIDER"
|
||||
|
||||
FITBIT_STEPS_SUMMARY:
|
||||
type: object
|
||||
required: [TABLE, PROVIDERS]
|
||||
|
|
Loading…
Reference in New Issue