From d48194fc078093f3f3ef62fb963c6f8380a28648 Mon Sep 17 00:00:00 2001 From: JulioV Date: Thu, 11 Mar 2021 12:19:44 -0500 Subject: [PATCH] Add fitbitparsed_csv --- config.yaml | 4 + docs/datastreams/data-streams-introduction.md | 18 +- docs/datastreams/fitbitparsed-csv.md | 17 ++ docs/datastreams/fitbitparsed-mysql.md | 235 +----------------- docs/snippets/parsedfitbit_format.md | 233 +++++++++++++++++ src/data/streams/fitbitjson_csv/container.R | 2 +- src/data/streams/fitbitparsed_csv/container.R | 37 +++ src/data/streams/fitbitparsed_csv/format.yaml | 83 +++++++ .../mutations/fitbit/add_zero_timestamp.py | 3 +- 9 files changed, 386 insertions(+), 246 deletions(-) create mode 100644 docs/datastreams/fitbitparsed-csv.md create mode 100644 docs/snippets/parsedfitbit_format.md create mode 100644 src/data/streams/fitbitparsed_csv/container.R create mode 100644 src/data/streams/fitbitparsed_csv/format.yaml diff --git a/config.yaml b/config.yaml index 38c95e4a..5b6750f0 100644 --- a/config.yaml +++ b/config.yaml @@ -350,6 +350,10 @@ FITBIT_DATA_STREAMS: FOLDER: data/external/fitbit_csv SLEEP_SUMMARY_EPISODE_DAY_ANCHOR: end # summary sleep episodes are considered as events based on either the start timestamp or end timestamp. + fitbitparsed_csv: + FOLDER: data/external/fitbit_csv + SLEEP_SUMMARY_EPISODE_DAY_ANCHOR: end # summary sleep episodes are considered as events based on either the start timestamp or end timestamp. + # Sensors ------ # See https://www.rapids.science/latest/features/fitbit-data-yield/ diff --git a/docs/datastreams/data-streams-introduction.md b/docs/datastreams/data-streams-introduction.md index 36b69d2a..05adc96e 100644 --- a/docs/datastreams/data-streams-introduction.md +++ b/docs/datastreams/data-streams-introduction.md @@ -15,14 +15,10 @@ For reference, these are the data streams we currently support: | Data Stream | Device | Format | Container | Docs |--|--|--|--|--| -| `aware_mysql`| Phone | AWARE app | MySQL | [link]() -| `aware_csv`| Phone | AWARE app | CSV files | [link]() -| `fitbitjson_mysql`| Fitbit | JSON (per Fitbit's API) | MySQL | [link]() -| `fitbitjson_csv`| Fitbit | JSON (per Fitbit's API) | CSV files | [link]() -| `fitbitparsed_mysql`| Fitbit | Parsed (parsed API data) | MySQL | [link]() -| `fitbitparsed_csv`| Fitbit | Parsed (parsed API data) | CSV files | [link]() -| `empatica_zip`| Empatica | E4 Connect | ZIP files | [link]() - -!!! hint - - Fitbit data can be processed from the JSON object produced by Fitbit's API (recommended) or from parsed tabular data (if you only have access to parsed data). - - Empatica data can only be accessed through the [E4 Connect website](https://support.empatica.com/hc/en-us/articles/201608896-Data-export-and-formatting-from-E4-connect-) that produces zip files with a CSV file per sensor which can be processed directly in RAPIDS. \ No newline at end of file +| `aware_mysql`| Phone | AWARE app | MySQL | [link](../aware-mysql) +| `aware_csv`| Phone | AWARE app | CSV files | [link](../aware-csv) +| `fitbitjson_mysql`| Fitbit | JSON (per [Fitbit's API](https://dev.fitbit.com/build/reference/web-api/)) | MySQL | [link](../fitbitjson-mysql) +| `fitbitjson_csv`| Fitbit | JSON (per [Fitbit's API](https://dev.fitbit.com/build/reference/web-api/)) | CSV files | [link](../fitbitjson-csv) +| `fitbitparsed_mysql`| Fitbit | Parsed (parsed API data) | MySQL | [link](../fitbitparsed-mysql) +| `fitbitparsed_csv`| Fitbit | Parsed (parsed API data) | CSV files | [link](../fitbitparsed-csv) +| `empatica_zip`| Empatica | [E4 Connect](https://support.empatica.com/hc/en-us/articles/201608896-Data-export-and-formatting-from-E4-connect-) | ZIP files | [link](../empatica-zip) diff --git a/docs/datastreams/fitbitparsed-csv.md b/docs/datastreams/fitbitparsed-csv.md new file mode 100644 index 00000000..9e07b9ca --- /dev/null +++ b/docs/datastreams/fitbitparsed-csv.md @@ -0,0 +1,17 @@ +# `fitbitparsed_csv` +This [data stream](../../datastreams/data-streams-introduction) handles Fitbit sensor data downloaded using the [Fitbit Web API](https://dev.fitbit.com/build/reference/web-api/), **parsed**, and stored in a CSV file. Please note that RAPIDS cannot query the API directly; you need to use other available tools or implement your own. Once you have your parsed sensor data in a CSV file, RAPIDS can process it. + +!!! info "What is the difference between JSON and plain data streams" + Most people will only need `fitbitjson_*` because they downloaded and stored their data directly from Fitbit's API. However, if, for some reason, you don't have access to that JSON data and instead only have the parsed data (columns and rows), you can use this data stream. + +## Container +The container should be a CSV file per sensor, each containing all participants' data. + +The script to connect and download data from this container is at: +```bash +src/data/streams/fitbitparsed_csv/container.R +``` + +## Format + +--8<---- "docs/snippets/parsedfitbit_format.md" diff --git a/docs/datastreams/fitbitparsed-mysql.md b/docs/datastreams/fitbitparsed-mysql.md index a7fa94dc..dc174263 100644 --- a/docs/datastreams/fitbitparsed-mysql.md +++ b/docs/datastreams/fitbitparsed-mysql.md @@ -2,7 +2,7 @@ This [data stream](../../datastreams/data-streams-introduction) handles Fitbit sensor data downloaded using the [Fitbit Web API](https://dev.fitbit.com/build/reference/web-api/), **parsed**, and stored in a MySQL database. Please note that RAPIDS cannot query the API directly; you need to use other available tools or implement your own. Once you have your parsed sensor data in a MySQL database, RAPIDS can process it. !!! info "What is the difference between JSON and plain data streams" - Most people will only need `fitbitjson_mysql` because they downloaded and stored their data directly from Fitbit's API. However, if, for some reason, you don't have access to that JSON data and instead only have the parsed data (columns and rows), you can use this data stream. + Most people will only need `fitbitjson_*` because they downloaded and stored their data directly from Fitbit's API. However, if, for some reason, you don't have access to that JSON data and instead only have the parsed data (columns and rows), you can use this data stream. ## Container The container should be a MySQL database with a table per sensor, each containing all participants' data. @@ -14,235 +14,4 @@ src/data/streams/fitbitparsed_mysql/container.R ## Format -The `format.yaml` maps and transforms columns in your raw data stream to the [mandatory columns RAPIDS needs for Fitbit sensors](../mandatory-fitbit-format). This file is at: - -```bash -src/data/streams/fitbitparsed_mysql/format.yaml -``` - -If you want to use this stream with your data, modify every sensor in `format.yaml` to map all columns except `TIMESTAMP` in `[RAPIDS_COLUMN_MAPPINGS]` to your raw data column names. - -All columns are mandatory; however, all except `device_id` and `local_date_time` can be empty if you don't have that data. Just have in mind that some features will be empty if some of these columns are empty. - - -??? info "FITBIT_HEARTRATE_SUMMARY" - - **RAPIDS_COLUMN_MAPPINGS** - - | RAPIDS column | Stream column | - |-----------------|-----------------| - | TIMESTAMP| FLAG_TO_MUTATE | - | LOCAL_DATE_TIME | local_date_time | - | DEVICE_ID | device_id | - | HEARTRATE_DAILY_RESTINGHR | heartrate_daily_restinghr | - | HEARTRATE_DAILY_CALORIESOUTOFRANGE | heartrate_daily_caloriesoutofrange | - | HEARTRATE_DAILY_CALORIESFATBURN | heartrate_daily_caloriesfatburn | - | HEARTRATE_DAILY_CALORIESCARDIO | heartrate_daily_caloriescardio | - | HEARTRATE_DAILY_CALORIESPEAK | heartrate_daily_caloriespeak | - - - **MUTATION** - - - **COLUMN_MAPPINGS** (None) - - - **SCRIPTS** - - ```bash - src/data/streams/mutations/fitbit/add_zero_timestamp.py - ``` - - !!! note - `add_zero_timestamp` adds an all-zero column called `timestamp` that will be filled in later in the pipeline by `readable_time.R` converting LOCAL_DATE_TIME to a unix timestamp taking into account single or multiple time zones. - - ??? example "Example of the raw data RAPIDS expects for this data stream" - - |device_id |local_date_time |heartrate_daily_restinghr |heartrate_daily_caloriesoutofrange |heartrate_daily_caloriesfatburn |heartrate_daily_caloriescardio |heartrate_daily_caloriespeak | - |-------------------------------------- |----------------- |------- |-------------- |------------- |------------ |-------| - |a748ee1a-1d0b-4ae9-9074-279a2b6ba524 |2020-10-07 |72 |1200.6102 |760.3020 |15.2048 |0 | - |a748ee1a-1d0b-4ae9-9074-279a2b6ba524 |2020-10-08 |70 |1100.1120 |660.0012 |23.7088 |0 | - |a748ee1a-1d0b-4ae9-9074-279a2b6ba524 |2020-10-09 |69 |750.3615 |734.1516 |131.8579 |0 | - -??? info "FITBIT_HEARTRATE_INTRADAY" - - **RAPIDS_COLUMN_MAPPINGS** - - | RAPIDS column | Stream column | - |-----------------|-----------------| - | TIMESTAMP| FLAG_TO_MUTATE | - | LOCAL_DATE_TIME | local_date_time | - | DEVICE_ID | device_id | - | HEARTRATE | heartrate | - | HEARTRATE_ZONE | heartrate_zone | - - - **MUTATION** - - - **COLUMN_MAPPINGS** (None) - - - **SCRIPTS** - - ```bash - src/data/streams/mutations/fitbit/add_zero_timestamp.py - ``` - - !!! note - `add_zero_timestamp` adds an all-zero column called `timestamp` that will be filled in later in the pipeline by `readable_time.R` converting LOCAL_DATE_TIME to a unix timestamp taking into account single or multiple time zones. - - ??? example "Example of the raw data RAPIDS expects for this data stream" - - |device_id |local_date_time |heartrate |heartrate_zone | - |-------------------------------------- |---------------------- |--------- |--------------- | - |a748ee1a-1d0b-4ae9-9074-279a2b6ba524 |2020-10-07 00:00:00 |68 |outofrange | - |a748ee1a-1d0b-4ae9-9074-279a2b6ba524 |2020-10-07 00:01:00 |67 |outofrange | - |a748ee1a-1d0b-4ae9-9074-279a2b6ba524 |2020-10-07 00:02:00 |67 |outofrange | - -??? info "FITBIT_SLEEP_SUMMARY" - - **RAPIDS_COLUMN_MAPPINGS** - - | RAPIDS column | Stream column | - |-----------------|-----------------| - | TIMESTAMP| FLAG_TO_MUTATE | - | LOCAL_DATE_TIME| local_date_time | - | LOCAL_START_DATE_TIME| local_start_date_time | - | LOCAL_END_DATE_TIME| local_end_date_time | - | DEVICE_ID| device_id | - | EFFICIENCY| efficiency | - | MINUTES_AFTER_WAKEUP| minutes_after_wakeup | - | MINUTES_ASLEEP| minutes_asleep | - | MINUTES_AWAKE| minutes_awake | - | MINUTES_TO_FALL_ASLEEP| minutes_to_fall_asleep | - | MINUTES_IN_BED| minutes_in_bed | - | IS_MAIN_SLEEP| is_main_sleep | - | TYPE| type | - - **MUTATION** - - - **COLUMN_MAPPINGS** (None) - - - **SCRIPTS** - - ```bash - src/data/streams/mutations/fitbit/add_zero_timestamp.py - ``` - - !!! note - `add_zero_timestamp` adds an all-zero column called `timestamp` that will be filled in later in the pipeline by `readable_time.R` converting LOCAL_DATE_TIME to a unix timestamp taking into account single or multiple time zones. - - Fitbit API has two versions for sleep data, v1 and v1.2. We support both but ignore v1's `count_awake`, `duration_awake`, and `count_awakenings`, `count_restless`, `duration_restless` columns. - - ??? example "Example of the expected raw data" - - |device_id |local_start_date_time |local_end_date_time |efficiency |minutes_after_wakeup |minutes_asleep |minutes_awake |minutes_to_fall_asleep |minutes_in_bed |is_main_sleep |type | - |-------------------------------------- |---------------------- |---------------------- |----------- |--------------------- |--------------- |-------------- |----------------------- |--------------- |-------------- |-------- | - |a748ee1a-1d0b-4ae9-9074-279a2b6ba524 |2020-10-10 15:36:30 |2020-10-10 16:37:00 |92 |0 |55 |5 |0 |60 |0 |classic | - |a748ee1a-1d0b-4ae9-9074-279a2b6ba524 |2020-10-10 01:46:30 |2020-10-10 08:10:00 |88 |0 |318 |65 |0 |383 |1 |stages | - |a748ee1a-1d0b-4ae9-9074-279a2b6ba524 |2020-10-11 00:12:30 |2020-10-11 11:47:00 |89 |1 |562 |132 |0 |694 |1 |stages | - |a748ee1a-1d0b-4ae9-9074-279a2b6ba524 |2020-10-12 01:31:00 |2020-10-12 09:34:30 |93 |0 |415 |68 |0 |483 |1 |stages | - - -??? info "FITBIT_SLEEP_INTRADAY" - - **RAPIDS_COLUMN_MAPPINGS** - - | RAPIDS column | Stream column | - |-----------------|-----------------| - | TIMESTAMP | FLAG_TO_MUTATE | - | LOCAL_DATE_TIME | local_date_time | - | DEVICE_ID | device_id | - | TYPE_EPISODE_ID | type_episode_id | - | DURATION | duration | - | IS_MAIN_SLEEP | is_main_sleep | - | TYPE | type | - | LEVEL | level | - - **MUTATION** - - - **COLUMN_MAPPINGS** (None) - - - **SCRIPTS** - - ```bash - src/data/streams/mutations/fitbit/add_zero_timestamp.py - ``` - - !!! note - `add_zero_timestamp` adds an all-zero column called `timestamp` that will be filled in later in the pipeline by `readable_time.R` converting LOCAL_DATE_TIME to a unix timestamp taking into account single or multiple time zones. - - Fitbit API has two versions for sleep data, v1 and v1.2, we support both. - - ??? example "Example of the expected raw data" - - |device_id |type_episode_id |local_date_time |duration |level |is_main_sleep |type | - |------------------------------------ |---------------- |------------------- |--------- |---------- |-------------- |-------------- | - |a748ee1a-1d0b-4ae9-9074-279a2b6ba524 |0 |2020-10-10 15:36:30 |60 |restless |0 |classic | - |a748ee1a-1d0b-4ae9-9074-279a2b6ba524 |0 |2020-10-10 15:37:30 |660 |asleep |0 |classic | - |a748ee1a-1d0b-4ae9-9074-279a2b6ba524 |0 |2020-10-10 15:48:30 |60 |restless |0 |classic | - |a748ee1a-1d0b-4ae9-9074-279a2b6ba524 |... |... |... |... |... |... | - |a748ee1a-1d0b-4ae9-9074-279a2b6ba524 |1 |2020-10-10 01:46:30 |420 |light |1 |stages | - |a748ee1a-1d0b-4ae9-9074-279a2b6ba524 |1 |2020-10-10 01:53:30 |1230 |deep |1 |stages | - -??? info "FITBIT_STEPS_SUMMARY" - - **RAPIDS_COLUMN_MAPPINGS** - - | RAPIDS column | Stream column | - |-----------------|-----------------| - | TIMESTAMP | FLAG_TO_MUTATE | - | DEVICE_ID | device_id | - | LOCAL_DATE_TIME | local_date_time | - | STEPS | steps | - - **MUTATION** - - - **COLUMN_MAPPINGS** (None) - - - **SCRIPTS** - - ```bash - src/data/streams/mutations/fitbit/add_zero_timestamp.py - ``` - - !!! note - `add_zero_timestamp` adds an all-zero column called `timestamp` that will be filled in later in the pipeline by `readable_time.R` converting LOCAL_DATE_TIME to a unix timestamp taking into account single or multiple time zones. - - ??? example "Example of the expected raw data" - - |device_id |local_date_time |steps | - |-------------------------------------- |---------------------- |--------- | - |a748ee1a-1d0b-4ae9-9074-279a2b6ba524 |2020-10-07 |1775 | - |a748ee1a-1d0b-4ae9-9074-279a2b6ba524 |2020-10-08 |3201 | - |a748ee1a-1d0b-4ae9-9074-279a2b6ba524 |2020-10-09 |998 | - -??? info "FITBIT_STEPS_INTRADAY" - - **RAPIDS_COLUMN_MAPPINGS** - - | RAPIDS column | Stream column | - |-----------------|-----------------| - | TIMESTAMP | FLAG_TO_MUTATE | - | DEVICE_ID | device_id | - | LOCAL_DATE_TIME | local_date_time | - | STEPS | steps | - - **MUTATION** - - - **COLUMN_MAPPINGS** (None) - - - **SCRIPTS** - - ```bash - src/data/streams/mutations/fitbit/add_zero_timestamp.py - ``` - - !!! note - `add_zero_timestamp` adds an all-zero column called `timestamp` that will be filled in later in the pipeline by `readable_time.R` converting LOCAL_DATE_TIME to a unix timestamp taking into account single or multiple time zones. - - ??? example "Example of the expected raw data" - - |device_id |local_date_time |steps | - |-------------------------------------- |---------------------- |--------- | - |a748ee1a-1d0b-4ae9-9074-279a2b6ba524 |2020-10-07 00:00:00 |5 | - |a748ee1a-1d0b-4ae9-9074-279a2b6ba524 |2020-10-07 00:01:00 |3 | - |a748ee1a-1d0b-4ae9-9074-279a2b6ba524 |2020-10-07 00:02:00 |0 | - +--8<---- "docs/snippets/parsedfitbit_format.md" diff --git a/docs/snippets/parsedfitbit_format.md b/docs/snippets/parsedfitbit_format.md new file mode 100644 index 00000000..86f2a762 --- /dev/null +++ b/docs/snippets/parsedfitbit_format.md @@ -0,0 +1,233 @@ + +The `format.yaml` maps and transforms columns in your raw data stream to the [mandatory columns RAPIDS needs for Fitbit sensors](../mandatory-fitbit-format). This file is at: + +```bash +src/data/streams/fitbitparsed_mysql/format.yaml +``` + +If you want to use this stream with your data, modify every sensor in `format.yaml` to map all columns except `TIMESTAMP` in `[RAPIDS_COLUMN_MAPPINGS]` to your raw data column names. + +All columns are mandatory; however, all except `device_id` and `local_date_time` can be empty if you don't have that data. Just have in mind that some features will be empty if some of these columns are empty. + + +??? info "FITBIT_HEARTRATE_SUMMARY" + + **RAPIDS_COLUMN_MAPPINGS** + + | RAPIDS column | Stream column | + |-----------------|-----------------| + | TIMESTAMP| FLAG_TO_MUTATE | + | LOCAL_DATE_TIME | local_date_time | + | DEVICE_ID | device_id | + | HEARTRATE_DAILY_RESTINGHR | heartrate_daily_restinghr | + | HEARTRATE_DAILY_CALORIESOUTOFRANGE | heartrate_daily_caloriesoutofrange | + | HEARTRATE_DAILY_CALORIESFATBURN | heartrate_daily_caloriesfatburn | + | HEARTRATE_DAILY_CALORIESCARDIO | heartrate_daily_caloriescardio | + | HEARTRATE_DAILY_CALORIESPEAK | heartrate_daily_caloriespeak | + + + **MUTATION** + + - **COLUMN_MAPPINGS** (None) + + - **SCRIPTS** + + ```bash + src/data/streams/mutations/fitbit/add_zero_timestamp.py + ``` + + !!! note + `add_zero_timestamp` adds an all-zero column called `timestamp` that will be filled in later in the pipeline by `readable_time.R` converting LOCAL_DATE_TIME to a unix timestamp taking into account single or multiple time zones. + + ??? example "Example of the raw data RAPIDS expects for this data stream" + + |device_id |local_date_time |heartrate_daily_restinghr |heartrate_daily_caloriesoutofrange |heartrate_daily_caloriesfatburn |heartrate_daily_caloriescardio |heartrate_daily_caloriespeak | + |-------------------------------------- |----------------- |------- |-------------- |------------- |------------ |-------| + |a748ee1a-1d0b-4ae9-9074-279a2b6ba524 |2020-10-07 |72 |1200.6102 |760.3020 |15.2048 |0 | + |a748ee1a-1d0b-4ae9-9074-279a2b6ba524 |2020-10-08 |70 |1100.1120 |660.0012 |23.7088 |0 | + |a748ee1a-1d0b-4ae9-9074-279a2b6ba524 |2020-10-09 |69 |750.3615 |734.1516 |131.8579 |0 | + +??? info "FITBIT_HEARTRATE_INTRADAY" + + **RAPIDS_COLUMN_MAPPINGS** + + | RAPIDS column | Stream column | + |-----------------|-----------------| + | TIMESTAMP| FLAG_TO_MUTATE | + | LOCAL_DATE_TIME | local_date_time | + | DEVICE_ID | device_id | + | HEARTRATE | heartrate | + | HEARTRATE_ZONE | heartrate_zone | + + + **MUTATION** + + - **COLUMN_MAPPINGS** (None) + + - **SCRIPTS** + + ```bash + src/data/streams/mutations/fitbit/add_zero_timestamp.py + ``` + + !!! note + `add_zero_timestamp` adds an all-zero column called `timestamp` that will be filled in later in the pipeline by `readable_time.R` converting LOCAL_DATE_TIME to a unix timestamp taking into account single or multiple time zones. + + ??? example "Example of the raw data RAPIDS expects for this data stream" + + |device_id |local_date_time |heartrate |heartrate_zone | + |-------------------------------------- |---------------------- |--------- |--------------- | + |a748ee1a-1d0b-4ae9-9074-279a2b6ba524 |2020-10-07 00:00:00 |68 |outofrange | + |a748ee1a-1d0b-4ae9-9074-279a2b6ba524 |2020-10-07 00:01:00 |67 |outofrange | + |a748ee1a-1d0b-4ae9-9074-279a2b6ba524 |2020-10-07 00:02:00 |67 |outofrange | + +??? info "FITBIT_SLEEP_SUMMARY" + + **RAPIDS_COLUMN_MAPPINGS** + + | RAPIDS column | Stream column | + |-----------------|-----------------| + | TIMESTAMP| FLAG_TO_MUTATE | + | LOCAL_DATE_TIME| local_date_time | + | LOCAL_START_DATE_TIME| local_start_date_time | + | LOCAL_END_DATE_TIME| local_end_date_time | + | DEVICE_ID| device_id | + | EFFICIENCY| efficiency | + | MINUTES_AFTER_WAKEUP| minutes_after_wakeup | + | MINUTES_ASLEEP| minutes_asleep | + | MINUTES_AWAKE| minutes_awake | + | MINUTES_TO_FALL_ASLEEP| minutes_to_fall_asleep | + | MINUTES_IN_BED| minutes_in_bed | + | IS_MAIN_SLEEP| is_main_sleep | + | TYPE| type | + + **MUTATION** + + - **COLUMN_MAPPINGS** (None) + + - **SCRIPTS** + + ```bash + src/data/streams/mutations/fitbit/add_zero_timestamp.py + ``` + + !!! note + `add_zero_timestamp` adds an all-zero column called `timestamp` that will be filled in later in the pipeline by `readable_time.R` converting LOCAL_DATE_TIME to a unix timestamp taking into account single or multiple time zones. + + Fitbit API has two versions for sleep data, v1 and v1.2. We support both but ignore v1's `count_awake`, `duration_awake`, and `count_awakenings`, `count_restless`, `duration_restless` columns. + + ??? example "Example of the expected raw data" + + |device_id |local_start_date_time |local_end_date_time |efficiency |minutes_after_wakeup |minutes_asleep |minutes_awake |minutes_to_fall_asleep |minutes_in_bed |is_main_sleep |type | + |-------------------------------------- |---------------------- |---------------------- |----------- |--------------------- |--------------- |-------------- |----------------------- |--------------- |-------------- |-------- | + |a748ee1a-1d0b-4ae9-9074-279a2b6ba524 |2020-10-10 15:36:30 |2020-10-10 16:37:00 |92 |0 |55 |5 |0 |60 |0 |classic | + |a748ee1a-1d0b-4ae9-9074-279a2b6ba524 |2020-10-10 01:46:30 |2020-10-10 08:10:00 |88 |0 |318 |65 |0 |383 |1 |stages | + |a748ee1a-1d0b-4ae9-9074-279a2b6ba524 |2020-10-11 00:12:30 |2020-10-11 11:47:00 |89 |1 |562 |132 |0 |694 |1 |stages | + |a748ee1a-1d0b-4ae9-9074-279a2b6ba524 |2020-10-12 01:31:00 |2020-10-12 09:34:30 |93 |0 |415 |68 |0 |483 |1 |stages | + + +??? info "FITBIT_SLEEP_INTRADAY" + + **RAPIDS_COLUMN_MAPPINGS** + + | RAPIDS column | Stream column | + |-----------------|-----------------| + | TIMESTAMP | FLAG_TO_MUTATE | + | LOCAL_DATE_TIME | local_date_time | + | DEVICE_ID | device_id | + | TYPE_EPISODE_ID | type_episode_id | + | DURATION | duration | + | IS_MAIN_SLEEP | is_main_sleep | + | TYPE | type | + | LEVEL | level | + + **MUTATION** + + - **COLUMN_MAPPINGS** (None) + + - **SCRIPTS** + + ```bash + src/data/streams/mutations/fitbit/add_zero_timestamp.py + ``` + + !!! note + `add_zero_timestamp` adds an all-zero column called `timestamp` that will be filled in later in the pipeline by `readable_time.R` converting LOCAL_DATE_TIME to a unix timestamp taking into account single or multiple time zones. + + Fitbit API has two versions for sleep data, v1 and v1.2, we support both. + + ??? example "Example of the expected raw data" + + |device_id |type_episode_id |local_date_time |duration |level |is_main_sleep |type | + |------------------------------------ |---------------- |------------------- |--------- |---------- |-------------- |-------------- | + |a748ee1a-1d0b-4ae9-9074-279a2b6ba524 |0 |2020-10-10 15:36:30 |60 |restless |0 |classic | + |a748ee1a-1d0b-4ae9-9074-279a2b6ba524 |0 |2020-10-10 15:37:30 |660 |asleep |0 |classic | + |a748ee1a-1d0b-4ae9-9074-279a2b6ba524 |0 |2020-10-10 15:48:30 |60 |restless |0 |classic | + |a748ee1a-1d0b-4ae9-9074-279a2b6ba524 |... |... |... |... |... |... | + |a748ee1a-1d0b-4ae9-9074-279a2b6ba524 |1 |2020-10-10 01:46:30 |420 |light |1 |stages | + |a748ee1a-1d0b-4ae9-9074-279a2b6ba524 |1 |2020-10-10 01:53:30 |1230 |deep |1 |stages | + +??? info "FITBIT_STEPS_SUMMARY" + + **RAPIDS_COLUMN_MAPPINGS** + + | RAPIDS column | Stream column | + |-----------------|-----------------| + | TIMESTAMP | FLAG_TO_MUTATE | + | DEVICE_ID | device_id | + | LOCAL_DATE_TIME | local_date_time | + | STEPS | steps | + + **MUTATION** + + - **COLUMN_MAPPINGS** (None) + + - **SCRIPTS** + + ```bash + src/data/streams/mutations/fitbit/add_zero_timestamp.py + ``` + + !!! note + `add_zero_timestamp` adds an all-zero column called `timestamp` that will be filled in later in the pipeline by `readable_time.R` converting LOCAL_DATE_TIME to a unix timestamp taking into account single or multiple time zones. + + ??? example "Example of the expected raw data" + + |device_id |local_date_time |steps | + |-------------------------------------- |---------------------- |--------- | + |a748ee1a-1d0b-4ae9-9074-279a2b6ba524 |2020-10-07 |1775 | + |a748ee1a-1d0b-4ae9-9074-279a2b6ba524 |2020-10-08 |3201 | + |a748ee1a-1d0b-4ae9-9074-279a2b6ba524 |2020-10-09 |998 | + +??? info "FITBIT_STEPS_INTRADAY" + + **RAPIDS_COLUMN_MAPPINGS** + + | RAPIDS column | Stream column | + |-----------------|-----------------| + | TIMESTAMP | FLAG_TO_MUTATE | + | DEVICE_ID | device_id | + | LOCAL_DATE_TIME | local_date_time | + | STEPS | steps | + + **MUTATION** + + - **COLUMN_MAPPINGS** (None) + + - **SCRIPTS** + + ```bash + src/data/streams/mutations/fitbit/add_zero_timestamp.py + ``` + + !!! note + `add_zero_timestamp` adds an all-zero column called `timestamp` that will be filled in later in the pipeline by `readable_time.R` converting LOCAL_DATE_TIME to a unix timestamp taking into account single or multiple time zones. + + ??? example "Example of the expected raw data" + + |device_id |local_date_time |steps | + |-------------------------------------- |---------------------- |--------- | + |a748ee1a-1d0b-4ae9-9074-279a2b6ba524 |2020-10-07 00:00:00 |5 | + |a748ee1a-1d0b-4ae9-9074-279a2b6ba524 |2020-10-07 00:01:00 |3 | + |a748ee1a-1d0b-4ae9-9074-279a2b6ba524 |2020-10-07 00:02:00 |0 | + diff --git a/src/data/streams/fitbitjson_csv/container.R b/src/data/streams/fitbitjson_csv/container.R index 2bd8a8cb..79165ef6 100644 --- a/src/data/streams/fitbitjson_csv/container.R +++ b/src/data/streams/fitbitjson_csv/container.R @@ -12,7 +12,7 @@ library(readr) pull_data <- function(stream_parameters, device, sensor, sensor_container, columns){ if(!dir.exists(stream_parameters$FOLDER)) - stop("[FITBIT_DATA_STREAMS][fitbit_json_csv][FOLDER] does not exist: ", stream_parameters$FOLDER) + stop("[FITBIT_DATA_STREAMS][fitbitjson_csv][FOLDER] does not exist: ", stream_parameters$FOLDER) data_file <- file.path(stream_parameters$FOLDER, sensor_container) if(!file.exists(data_file)) diff --git a/src/data/streams/fitbitparsed_csv/container.R b/src/data/streams/fitbitparsed_csv/container.R new file mode 100644 index 00000000..dca02566 --- /dev/null +++ b/src/data/streams/fitbitparsed_csv/container.R @@ -0,0 +1,37 @@ +# if you need a new package, you should add it with renv::install(package) so your renv venv is updated +library(readr) + +#' @description +#' Gets the sensor data for a specific device id from a database table, file or whatever source you want to query +#' +#' @param stream_parameters The PHONE_STREAM_PARAMETERS key in config.yaml. If you need specific parameters add them there. +#' @param device A device ID string +#' @param sensor_container database table or file containing the sensor data for all participants. This is the PHONE_SENSOR[TABLE] key in config.yaml +#' @param columns the columns needed from this sensor (we recommend to only return these columns instead of every column in sensor_container) +#' @return A dataframe with the sensor data for device + +pull_data <- function(stream_parameters, device, sensor, sensor_container, columns){ + if(!dir.exists(stream_parameters$FOLDER)) + stop("[FITBIT_DATA_STREAMS][fitbitparsed_csv][FOLDER] does not exist: ", stream_parameters$FOLDER) + data_file <- file.path(stream_parameters$FOLDER, sensor_container) + + if(!file.exists(data_file)) + stop("The data container should be a CSV file but it does not exist: ", data_file) + + if(!endsWith(data_file, ".csv")) + stop("The data container should be a CSV file: ", data_file) + + # Letting the user know what we are doing + message(paste0("Reading this CSV file: ", data_file)) + + sensor_data <- read_csv_chunked(data_file, + callback = DataFrameCallback$new(function(x, pos) subset(x,x[[columns$DEVICE_ID]] == device, select = unlist(columns))), progress = T, chunk_size = 50000) + if(is.null(sensor_data)) # emtpy file + sensor_data <- read.csv(data_file) + + if(nrow(sensor_data) == 0) + warning("The device '", device,"' did not have data in ", sensor_container) + + return(sensor_data) +} + diff --git a/src/data/streams/fitbitparsed_csv/format.yaml b/src/data/streams/fitbitparsed_csv/format.yaml new file mode 100644 index 00000000..ecaaab11 --- /dev/null +++ b/src/data/streams/fitbitparsed_csv/format.yaml @@ -0,0 +1,83 @@ +FITBIT_HEARTRATE_SUMMARY: + RAPIDS_COLUMN_MAPPINGS: + TIMESTAMP: FLAG_TO_MUTATE + DEVICE_ID: device_id + LOCAL_DATE_TIME: local_date_time + HEARTRATE_DAILY_RESTINGHR: heartrate_daily_restinghr + HEARTRATE_DAILY_CALORIESOUTOFRANGE: heartrate_daily_caloriesoutofrange + HEARTRATE_DAILY_CALORIESFATBURN: heartrate_daily_caloriesfatburn + HEARTRATE_DAILY_CALORIESCARDIO: heartrate_daily_caloriescardio + HEARTRATE_DAILY_CALORIESPEAK: heartrate_daily_caloriespeak + MUTATION: + COLUMN_MAPPINGS: + SCRIPTS: # List any python or r scripts that mutate your raw data + - src/data/streams/mutations/fitbit/add_zero_timestamp.py + +FITBIT_HEARTRATE_INTRADAY: + RAPIDS_COLUMN_MAPPINGS: + TIMESTAMP: FLAG_TO_MUTATE + DEVICE_ID: device_id + LOCAL_DATE_TIME: local_date_time + HEARTRATE: heartrate + HEARTRATE_ZONE: heartrate_zone + MUTATION: + COLUMN_MAPPINGS: + SCRIPTS: # List any python or r scripts that mutate your raw data + - src/data/streams/mutations/fitbit/add_zero_timestamp.py + +FITBIT_SLEEP_SUMMARY: + RAPIDS_COLUMN_MAPPINGS: + TIMESTAMP: FLAG_TO_MUTATE + DEVICE_ID: device_id + LOCAL_DATE_TIME: local_date_time + LOCAL_START_DATE_TIME: local_start_date_time + LOCAL_END_DATE_TIME: local_end_date_time + EFFICIENCY: efficiency + MINUTES_AFTER_WAKEUP: minutes_after_wakeup + MINUTES_ASLEEP: minutes_asleep + MINUTES_AWAKE: minutes_awake + MINUTES_TO_FALL_ASLEEP: minutes_to_fall_asleep + MINUTES_IN_BED: minutes_in_bed + IS_MAIN_SLEEP: is_main_sleep + TYPE: type + MUTATION: + COLUMN_MAPPINGS: + SCRIPTS: # List any python or r scripts that mutate your raw data + - src/data/streams/mutations/fitbit/add_zero_timestamp.py + +FITBIT_SLEEP_INTRADAY: + RAPIDS_COLUMN_MAPPINGS: + TIMESTAMP: FLAG_TO_MUTATE + DEVICE_ID: device_id + LOCAL_DATE_TIME: local_date_time + TYPE_EPISODE_ID: type_episode_id + DURATION: duration + IS_MAIN_SLEEP: is_main_sleep + TYPE: type + LEVEL: level + MUTATION: + COLUMN_MAPPINGS: + SCRIPTS: # List any python or r scripts that mutate your raw data + - src/data/streams/mutations/fitbit/add_zero_timestamp.py + +FITBIT_STEPS_SUMMARY: + RAPIDS_COLUMN_MAPPINGS: + TIMESTAMP: FLAG_TO_MUTATE + DEVICE_ID: device_id + LOCAL_DATE_TIME: local_date_time + STEPS: steps + MUTATION: + COLUMN_MAPPINGS: + SCRIPTS: # List any python or r scripts that mutate your raw data + - src/data/streams/mutations/fitbit/add_zero_timestamp.py + +FITBIT_STEPS_INTRADAY: + RAPIDS_COLUMN_MAPPINGS: + TIMESTAMP: FLAG_TO_MUTATE + DEVICE_ID: device_id + LOCAL_DATE_TIME: local_date_time + STEPS: steps + MUTATION: + COLUMN_MAPPINGS: + SCRIPTS: # List any python or r scripts that mutate your raw data + - src/data/streams/mutations/fitbit/add_zero_timestamp.py diff --git a/src/data/streams/mutations/fitbit/add_zero_timestamp.py b/src/data/streams/mutations/fitbit/add_zero_timestamp.py index 5d4a4490..df84c9b5 100644 --- a/src/data/streams/mutations/fitbit/add_zero_timestamp.py +++ b/src/data/streams/mutations/fitbit/add_zero_timestamp.py @@ -2,5 +2,6 @@ import pandas as pd def main(parsed_data, stream_parameters): parsed_data["timestamp"] = 0 # this column is added at readable_datetime.R because we neeed to take into account multiple timezones - # parsed_data['local_date_time'] = parsed_data['local_date_time'].dt.strftime('%Y-%m-%d %H:%M:%S') + if pd.api.types.is_datetime64_any_dtype( parsed_data['local_date_time']): + parsed_data['local_date_time'] = parsed_data['local_date_time'].dt.strftime('%Y-%m-%d %H:%M:%S') return(parsed_data)