Update minimal workflow example docs

pull/103/head
JulioV 2020-11-04 13:27:58 -05:00
parent 40f7ef4935
commit 1ea2730afd
7 changed files with 162 additions and 49 deletions

View File

@ -1,20 +1,20 @@
# Participants to include in the analysis
# You must create a participant file per person manually or automatically (see PARTICIPANT_FILES below)
PIDS: [test01]
# Use tz codes from https://en.wikipedia.org/wiki/List_of_tz_database_time_zones. Double check your code, for example EST is not US Eastern Time.
TIMEZONE: &timezone
America/New_York
# See https://www.rapids.science/setup/configuration/#database-credentials
DATABASE_GROUP: &database_group
MY_GROUP
# To create your participant files from the aware_device table or a csv file change config below and run 'snakemake -j1 create_participants_files'
# See https://www.rapids.science/setup/configuration/#timezone-of-your-study
TIMEZONE: &timezone
America/New_York
# See https://www.rapids.science/setup/configuration/#participant-files
PIDS: [j01]
# See https://www.rapids.science/setup/configuration/#automatic-creation-of-participant-files
CREATE_PARTICIPANT_FILES:
SOURCE:
TYPE: AWARE_DEVICE_TABLE #AWARE_DEVICE_TABLE or CSV_FILE
DATABASE_GROUP: *database_group
CSV_FILE_PATH: "data/external/example_participants.csv" # must have columns: PHONE DEVICE_ID_COLUMN, FITBIT DEVICE_ID_COLUMN, pid , label, start_date, end_date
CSV_FILE_PATH: "data/external/example_participants.csv" # see docs for required format
TIMEZONE: *timezone
PHONE_SECTION:
ADD: TRUE
@ -25,11 +25,13 @@ CREATE_PARTICIPANT_FILES:
DEVICE_ID_COLUMN: device_id # column name
IGNORED_DEVICE_IDS: []
# See https://www.rapids.science/setup/configuration/#day-segments
DAY_SEGMENTS: &day_segments
TYPE: PERIODIC # FREQUENCY, PERIODIC, EVENT
FILE: "data/external/daysegments_periodic.csv"
INCLUDE_PAST_PERIODIC_SEGMENTS: FALSE # Only relevant if TYPE=PERIODIC, if set to TRUE we consider day segments back enough in the past as to include the first day of data
TYPE: EVENT # FREQUENCY, PERIODIC, EVENT
FILE: "data/external/daysegments_event.csv"
INCLUDE_PAST_PERIODIC_SEGMENTS: FALSE # Only relevant if TYPE=PERIODIC, see docs
# See https://www.rapids.science/setup/configuration/#device-data-source-configuration
SENSOR_DATA:
PHONE:
SOURCE:
@ -38,7 +40,7 @@ SENSOR_DATA:
DEVICE_ID_COLUMN: device_id # column name
TIMEZONE:
TYPE: SINGLE # SINGLE or MULTIPLE
VALUE: *timezone # IF TYPE=SINGLE, timezone code (e.g. America/New_York, see attribute TIMEZONE above). If TYPE=MULTIPLE, a table in your database with two columns (timestamp, timezone) where timestamp is a unix timestamp and timezone is one of https://en.wikipedia.org/wiki/List_of_tz_database_time_zones
VALUE: *timezone # IF TYPE=SINGLE, see docs
FITBIT:
SOURCE:
TYPE: DATABASE # DATABASE or FILES (set each FITBIT_SENSOR TABLE attribute accordingly with a table name or a file path)
@ -46,7 +48,7 @@ SENSOR_DATA:
DEVICE_ID_COLUMN: device_id # column name
TIMEZONE:
TYPE: SINGLE # Fitbit only supports SINGLE timezones
VALUE: *timezone # timezone code (e.g. America/New_York, see attribute TIMEZONE above and https://en.wikipedia.org/wiki/List_of_tz_database_time_zones)
VALUE: *timezone # see docs
############## PHONE ###########################################################
################################################################################
@ -71,7 +73,7 @@ PHONE_MESSAGES:
TABLE: messages
PROVIDERS:
RAPIDS:
COMPUTE: False
COMPUTE: True
MESSAGES_TYPES : [received, sent]
FEATURES:
received: [count, distinctcontacts, timefirstmessage, timelastmessage, countmostfrequentcontact]

View File

@ -1,9 +1,9 @@
label,event_timestamp,length,shift,shift_direction,device_id
stress,1587661220000,1hours,0minutes,1,a748ee1a-1d0b-4ae9-9074-279a2b6ba524
stress,1587747620000,4hours,4hours,-1,a748ee1a-1d0b-4ae9-9074-279a2b6ba524
stress,1587906020000,3hours,0minutes,1,a748ee1a-1d0b-4ae9-9074-279a2b6ba524
stress,1588003220000,7hours,4hours,-1,a748ee1a-1d0b-4ae9-9074-279a2b6ba524
stress,1588172420000,9hours,0,-1,a748ee1a-1d0b-4ae9-9074-279a2b6ba524
mood,1587661220000,1hour,0,0,a748ee1a-1d0b-4ae9-9074-279a2b6ba524
mood,1587747620000,1days,0,0,a748ee1a-1d0b-4ae9-9074-279a2b6ba524
mood,1587906020000,7days,0,0,a748ee1a-1d0b-4ae9-9074-279a2b6ba524
stress,1587661220000,1H,0M,1,a748ee1a-1d0b-4ae9-9074-279a2b6ba524
stress,1587747620000,4H,4H,-1,a748ee1a-1d0b-4ae9-9074-279a2b6ba524
stress,1587906020000,3H,0M,1,a748ee1a-1d0b-4ae9-9074-279a2b6ba524
stress,1588003220000,7H,4H,-1,a748ee1a-1d0b-4ae9-9074-279a2b6ba524
stress,1588172420000,9H,0,-1,a748ee1a-1d0b-4ae9-9074-279a2b6ba524
mood,1587661220000,1H,0,0,a748ee1a-1d0b-4ae9-9074-279a2b6ba524
mood,1587747620000,1D,0,0,a748ee1a-1d0b-4ae9-9074-279a2b6ba524
mood,1587906020000,7D,0,0,a748ee1a-1d0b-4ae9-9074-279a2b6ba524

1 label event_timestamp length shift shift_direction device_id
2 stress 1587661220000 1hours 1H 0minutes 0M 1 a748ee1a-1d0b-4ae9-9074-279a2b6ba524
3 stress 1587747620000 4hours 4H 4hours 4H -1 a748ee1a-1d0b-4ae9-9074-279a2b6ba524
4 stress 1587906020000 3hours 3H 0minutes 0M 1 a748ee1a-1d0b-4ae9-9074-279a2b6ba524
5 stress 1588003220000 7hours 7H 4hours 4H -1 a748ee1a-1d0b-4ae9-9074-279a2b6ba524
6 stress 1588172420000 9hours 9H 0 -1 a748ee1a-1d0b-4ae9-9074-279a2b6ba524
7 mood 1587661220000 1hour 1H 0 0 a748ee1a-1d0b-4ae9-9074-279a2b6ba524
8 mood 1587747620000 1days 1D 0 0 a748ee1a-1d0b-4ae9-9074-279a2b6ba524
9 mood 1587906020000 7days 7D 0 0 a748ee1a-1d0b-4ae9-9074-279a2b6ba524

1
docs/CNAME 100644
View File

@ -0,0 +1 @@
www.rapids.science

View File

@ -7,10 +7,10 @@ You need to follow these steps to configure your RAPIDS deployment before you ca
2. Choose the [timezone of your study](#timezone-of-your-study)
3. Create your [participants files](#participant-files)
4. Select what [day segments](#day-segments) you want to extract features on
5. Modify your [device data configuration](#device-data-configuration)
5. Modify your [device data source configuration](#device-data-source-configuration)
6. Select what [sensors and features](#sensor-and-features-to-process) you want to process
When you are done with this initial configuration, go to [executing RAPIDS]().
When you are done with this initial configuration, go to [executing RAPIDS](/setup/execution).
!!! hint
Every time you see `config["KEY"]` or `[KEY]` in these docs we are referring to the corresponding key in the `config.yaml` file.
@ -61,23 +61,16 @@ Support coming soon.
## Participant files
Participant files link together multiple devices (smartphones and wearables) to specific participants and identify them throughout RAPIDS. You can create these files manually or [automatically](#automatic-creation-of-participant-files). Participant files are stored in `data/external/participant_files/pxx.yaml` and follow a unified structure:
Participant files link together multiple devices (smartphones and wearables) to specific participants and identify them throughout RAPIDS. You can create these files manually or [automatically](#automatic-creation-of-participant-files). Participant files are stored in `data/external/participant_files/pxx.yaml` and follow a unified [structure](#structure-of-participants-files).
```yaml
# This is the content of a participant file (data/external/participant_files/pxx.yaml)
PHONE:
DEVICE_IDS: [a748ee1a-1d0b-4ae9-9074-279a2b6ba524, dsadas-2324-fgsf-sdwr-gdfgs4rfsdf43]
PLATFORMS: [android,ios]
LABEL: test01
START_DATE: 2020-04-23
END_DATE: 2020-10-28
FITBIT:
DEVICE_IDS: [fitbit1]
LABEL: test01
START_DATE: 2020-04-23
END_DATE: 2020-10-28
!!! note
The list `PIDS` in `config.yaml` needs to have the participant file names of the people you want to process. For example, if you created `p01.yaml`, `p02.yaml` and `p03.yaml` files in `/data/external/participant_files/ `, then `PIDS` should be:
```yaml
PIDS: [p01, p02, p03]
```
```
!!! tip
Attribute *values* of the `[PHONE]` and `[FITBIT]` sections in every participant file are optional which allows you to analyze data from participants that only carried smartphones, only Fitbit devices, or both.
??? hint "Optional: Migrating participants files with the old format"
If you were using the pre-release version of RAPIDS with participant files in plain text (as opposed to yaml), you can run the following command and your old files will be converted into yaml files stored in `data/external/participant_files/`
@ -86,14 +79,29 @@ FITBIT:
python tools/update_format_participant_files.py
```
!!! tip
Attributes of the `[PHONE]` and `[FITBIT]` sections are optional which allows you to analyze data from participants that only carried smartphones, only Fitbit devices, or both.
### Structure of participants files
!!! example "Example of the structure of a participant file"
In this example, the participant used an android phone, an ios phone, and a fitbit device throughout the study between Apr 23rd 2020 and Oct 28th 2020
```yaml
PHONE:
DEVICE_IDS: [a748ee1a-1d0b-4ae9-9074-279a2b6ba524, dsadas-2324-fgsf-sdwr-gdfgs4rfsdf43]
PLATFORMS: [android,ios]
LABEL: test01
START_DATE: 2020-04-23
END_DATE: 2020-10-28
FITBIT:
DEVICE_IDS: [fitbit1]
LABEL: test01
START_DATE: 2020-04-23
END_DATE: 2020-10-28
```
**For `[PHONE]`**
| Key | Description |
| Key                      | Description |
|-------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| `[DEVICE_IDS]` | An array of the strings that uniquely identify each smartphone, you can have more than one for when participants changed phones in the middle of the study, in this case, data from all their devices will be joined and relabeled with the last 1 on this list. |
| `[PLATFORMS]` | An array that specifies the OS of each smartphone in `[DEVICE_IDS]` , use a combination of `android` or `ios` (we support participants that changed platforms in the middle of your study!). If you have an `aware_device` table in your database you can set `[PLATFORMS]: [multiple]` and RAPIDS will infer them automatically. |
@ -103,7 +111,7 @@ FITBIT:
**For `[FITBIT]`**
| Key | Description |
| Key                      | Description |
|------------------|-----------------------------------------------------------------------------------------------------------|
| `[DEVICE_IDS]` | An array of the strings that uniquely identify each Fitbit, you can have more than one in case the participant changed devices in the middle of the study, in this case, data from all devices will be joined and relabeled with the last `device_id` on this list. |
| `[LABEL]` | A string that is used in reports and visualizations. |
@ -306,13 +314,15 @@ Day segments (or epochs) are the time windows on which you want to extract behav
### Segment Examples
---
## Device Data Configuration
## Device Data Source Configuration
You might need to modify the following config keys in your `config.yaml` depending on what devices your participants used and where you are storing your data.
!!! hint
You can ignore `[SENSOR_DATA][PHONE]` or `[SENSOR_DATA][FITBIT]` if you are not working with either devices.
The relevant `config.yaml` section looks as follows by default:
```yaml
SENSOR_DATA:
PHONE:
@ -377,7 +387,7 @@ PHONE_MESSAGES:
SRC_FOLDER: "rapids" # inside src/features/phone_messages
```
| Key | Description |
| Key                      | Description |
|-------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| `[TABLE]` | The name of the table in your database that stores this sensor data. |
| `[PROVIDERS]` | A collection of `providers` . A provider is an author or group of authors that created specific features for the sensor at hand. The provider for all the features implemented by our team is called `RAPIDS` but we have also included contributions from other researchers (for example `DORYAB` for location features). |

View File

@ -0,0 +1,17 @@
# Execution
After you have [installed](/setup/installation) and [configured](/setup/configuration) RAPIDS, use the following command to execute it.
```bash
./rapids -j1
```
!!! info
The script `#!bash ./rapids` is a wrapper around Snakemake so you can pass any parameters that Snakemake accepts (e.g. `-j1`).
Any changes to the `config.yaml` file will be applied automatically and only the relevant files will be updated.
!!! hint "Multi-core"
You can run RAPIDS over multiple cores by modifying the `-j` argument (e.g. use `-j8` to use 8 cores). **However**, take into account that this means multiple sensor datasets for different participants will be load in memory at the same time. If RAPIDS crashes because it ran out of memory reduce the number of cores and try again.
As reference, we have run RAPIDS over 12 cores and 32 Gb of RAM without problems for a study with 200 participants with 14 days of low-frequency smartphone data (no accelerometer, gyroscope, or magnetometer).

View File

@ -0,0 +1,80 @@
Minimal Working Example
=======================
This is a quick guide for creating and running a simple pipeline to extract missing, outgoing, and incoming call features for `daily` and `night` epochs of one participant monitored on the US East coast.
1. Install RAPIDS and make sure your `conda` environment is active (see [Installation](/setup/installation))
2. For the [Initial Configuration](/setup/configuration) steps do the following and use the example as a guide:
!!! info "Things to change on each configuration step"
1\. Setup your database connection credentials in `.env`. We assume your credentials group is called `MY_GROUP`.
2\. Set `America/New_York` as the timezone
3\. Create a participant file `p01.yaml` based on one of your participants and add `p01` to `[PIDS]` in `config.yaml`
4\. Set `[DAY_SEGMENTS][TYPE]` to `PERIODIC` and `FILE` to a file containing the following lines:
```csv
label,start_time,length,repeats_on,repeats_value
daily,00:00:00,23H 59M 59S,every_day,0
night,00:00:00,5H 59M 59S,every_day,0
```
5\. If you collected data with AWARE you won't need to modify the attributes of `[SENSOR_DATA][PHONE]`
6\. Set `[PHONE_CALLS][PROVIDERS][RAPIDS][COMPUTE]` to `True`
!!! example "Example of the `config.yaml` sections after the changes outlined above"
```
PIDS: [p01]
TIMEZONE: &timezone
America/New_York
DATABASE_GROUP: &database_group
MY_GROUP
# ... other irrelevant sections
DAY_SEGMENTS: &day_segments
TYPE: PERIODIC
FILE: "data/external/daysegments_periodic.csv" # make sure the three lines specified above are in the file
INCLUDE_PAST_PERIODIC_SEGMENTS: FALSE
# No need to change this if you collected AWARE data on a database and your credentials are grouped under `MY_GROUP` in `.env`
SENSOR_DATA:
PHONE:
SOURCE:
TYPE: DATABASE
DATABASE_GROUP: *database_group
DEVICE_ID_COLUMN: device_id # column name
TIMEZONE:
TYPE: SINGLE # SINGLE or MULTIPLE
VALUE: *timezone
############## PHONE ###########################################################
################################################################################
....
# Communication call features config, TYPES and FEATURES keys need to match
PHONE_CALLS:
TABLE: calls # change if your calls table has a different name
PROVIDERS:
RAPIDS:
COMPUTE: True # set this to True!
CALL_TYPES: ...
```
3. Run RAPIDS
```bash
./rapids -j1
```
4. The call features for daily and morning day segments will be in
```
/data/processed/features/p01/phone_calls.csv
```

View File

@ -49,3 +49,6 @@ pages:
- Setup:
- Installation: 'setup/installation.md'
- Initial Configuration: setup/configuration.md
- Execution: setup/execution.md
- Example Workflows:
- Minimal: workflow-examples/minimal.md