Update minimal workflow example docs

2020-11-04 13:27:58 -05:00 · 2020-11-04 13:27:58 -05:00 · 1ea2730afd
parent 40f7ef4935
commit 1ea2730afd
7 changed files with 162 additions and 49 deletions
--- a/config.yaml
+++ b/config.yaml
@ -1,20 +1,20 @@
-# Participants to include in the analysis
-# You must create a participant file per person manually or automatically (see PARTICIPANT_FILES below)
-PIDS: [test01]
-
-# Use tz codes from https://en.wikipedia.org/wiki/List_of_tz_database_time_zones. Double check your code, for example EST is not US Eastern Time.
-TIMEZONE: &timezone
-  America/New_York
-
+# See https://www.rapids.science/setup/configuration/#database-credentials
 DATABASE_GROUP: &database_group
  MY_GROUP

-# To create your participant files from the aware_device table or a csv file change config below and run 'snakemake -j1 create_participants_files'
+# See https://www.rapids.science/setup/configuration/#timezone-of-your-study
+TIMEZONE: &timezone
+  America/New_York
+
+# See https://www.rapids.science/setup/configuration/#participant-files
+PIDS: [j01]
+
+# See https://www.rapids.science/setup/configuration/#automatic-creation-of-participant-files
 CREATE_PARTICIPANT_FILES:
  SOURCE:
    TYPE: AWARE_DEVICE_TABLE #AWARE_DEVICE_TABLE or CSV_FILE
    DATABASE_GROUP: *database_group
-    CSV_FILE_PATH: "data/external/example_participants.csv" # must have columns: PHONE DEVICE_ID_COLUMN, FITBIT DEVICE_ID_COLUMN, pid , label, start_date, end_date
+    CSV_FILE_PATH: "data/external/example_participants.csv" # see docs for required format
    TIMEZONE: *timezone
  PHONE_SECTION:
    ADD: TRUE
@ -25,11 +25,13 @@ CREATE_PARTICIPANT_FILES:
    DEVICE_ID_COLUMN: device_id # column name
    IGNORED_DEVICE_IDS: []

+# See https://www.rapids.science/setup/configuration/#day-segments
 DAY_SEGMENTS: &day_segments
-  TYPE: PERIODIC # FREQUENCY, PERIODIC, EVENT
-  FILE: "data/external/daysegments_periodic.csv"
-  INCLUDE_PAST_PERIODIC_SEGMENTS: FALSE # Only relevant if TYPE=PERIODIC, if set to TRUE we consider day segments back enough in the past as to include the first day of data
+  TYPE: EVENT # FREQUENCY, PERIODIC, EVENT
+  FILE: "data/external/daysegments_event.csv"
+  INCLUDE_PAST_PERIODIC_SEGMENTS: FALSE # Only relevant if TYPE=PERIODIC, see docs

+# See https://www.rapids.science/setup/configuration/#device-data-source-configuration
 SENSOR_DATA:
  PHONE:
    SOURCE: 
@ -38,7 +40,7 @@ SENSOR_DATA:
      DEVICE_ID_COLUMN: device_id # column name
    TIMEZONE: 
      TYPE: SINGLE # SINGLE or MULTIPLE
-      VALUE: *timezone # IF TYPE=SINGLE, timezone code (e.g. America/New_York, see attribute TIMEZONE above). If TYPE=MULTIPLE, a table in your database with two columns (timestamp, timezone) where timestamp is a unix timestamp and timezone is one of https://en.wikipedia.org/wiki/List_of_tz_database_time_zones
+      VALUE: *timezone # IF TYPE=SINGLE, see docs
  FITBIT:
    SOURCE: 
      TYPE: DATABASE # DATABASE or FILES (set each FITBIT_SENSOR TABLE attribute accordingly with a table name or a file path)
@ -46,7 +48,7 @@ SENSOR_DATA:
      DEVICE_ID_COLUMN: device_id # column name
    TIMEZONE: 
      TYPE: SINGLE # Fitbit only supports SINGLE timezones
-      VALUE: *timezone # timezone code (e.g. America/New_York, see attribute TIMEZONE above and https://en.wikipedia.org/wiki/List_of_tz_database_time_zones)
+      VALUE: *timezone # see docs

 ############## PHONE ###########################################################
 ################################################################################
@ -71,7 +73,7 @@ PHONE_MESSAGES:
  TABLE: messages
  PROVIDERS:
    RAPIDS:
-      COMPUTE: False
+      COMPUTE: True
      MESSAGES_TYPES : [received, sent]
      FEATURES: 
        received: [count, distinctcontacts, timefirstmessage, timelastmessage, countmostfrequentcontact]
--- a/data/external/daysegments_event.csv
+++ b/data/external/daysegments_event.csv
@ -1,9 +1,9 @@
 label,event_timestamp,length,shift,shift_direction,device_id
-stress,1587661220000,1hours,0minutes,1,a748ee1a-1d0b-4ae9-9074-279a2b6ba524
-stress,1587747620000,4hours,4hours,-1,a748ee1a-1d0b-4ae9-9074-279a2b6ba524
-stress,1587906020000,3hours,0minutes,1,a748ee1a-1d0b-4ae9-9074-279a2b6ba524
-stress,1588003220000,7hours,4hours,-1,a748ee1a-1d0b-4ae9-9074-279a2b6ba524
-stress,1588172420000,9hours,0,-1,a748ee1a-1d0b-4ae9-9074-279a2b6ba524
-mood,1587661220000,1hour,0,0,a748ee1a-1d0b-4ae9-9074-279a2b6ba524
-mood,1587747620000,1days,0,0,a748ee1a-1d0b-4ae9-9074-279a2b6ba524
-mood,1587906020000,7days,0,0,a748ee1a-1d0b-4ae9-9074-279a2b6ba524
+stress,1587661220000,1H,0M,1,a748ee1a-1d0b-4ae9-9074-279a2b6ba524
+stress,1587747620000,4H,4H,-1,a748ee1a-1d0b-4ae9-9074-279a2b6ba524
+stress,1587906020000,3H,0M,1,a748ee1a-1d0b-4ae9-9074-279a2b6ba524
+stress,1588003220000,7H,4H,-1,a748ee1a-1d0b-4ae9-9074-279a2b6ba524
+stress,1588172420000,9H,0,-1,a748ee1a-1d0b-4ae9-9074-279a2b6ba524
+mood,1587661220000,1H,0,0,a748ee1a-1d0b-4ae9-9074-279a2b6ba524
+mood,1587747620000,1D,0,0,a748ee1a-1d0b-4ae9-9074-279a2b6ba524
+mood,1587906020000,7D,0,0,a748ee1a-1d0b-4ae9-9074-279a2b6ba524
--- a/docs/CNAME
+++ b/docs/CNAME
@ -0,0 +1 @@
+www.rapids.science
--- a/docs/setup/configuration.md
+++ b/docs/setup/configuration.md
@ -7,10 +7,10 @@ You need to follow these steps to configure your RAPIDS deployment before you ca
 2. Choose the [timezone of your study](#timezone-of-your-study)
 3. Create your [participants files](#participant-files)
 4. Select what [day segments](#day-segments) you want to extract features on
-5. Modify your [device data configuration](#device-data-configuration)
+5. Modify your [device data source configuration](#device-data-source-configuration)
 6. Select what [sensors and features](#sensor-and-features-to-process) you want to process

-When you are done with this initial configuration, go to [executing RAPIDS]().
+When you are done with this initial configuration, go to [executing RAPIDS](/setup/execution).

 !!! hint
    Every time you see `config["KEY"]` or `[KEY]` in these docs we are referring to the corresponding key in the `config.yaml` file.
@ -61,23 +61,16 @@ Support coming soon.

 ## Participant files

-Participant files link together multiple devices (smartphones and wearables) to specific participants and identify them throughout RAPIDS. You can create these files manually or [automatically](#automatic-creation-of-participant-files). Participant files are stored in `data/external/participant_files/pxx.yaml` and follow a unified structure: 
+Participant files link together multiple devices (smartphones and wearables) to specific participants and identify them throughout RAPIDS. You can create these files manually or [automatically](#automatic-creation-of-participant-files). Participant files are stored in `data/external/participant_files/pxx.yaml` and follow a unified [structure](#structure-of-participants-files).

-```yaml
-# This is the content of a participant file (data/external/participant_files/pxx.yaml)
-PHONE:
-  DEVICE_IDS: [a748ee1a-1d0b-4ae9-9074-279a2b6ba524, dsadas-2324-fgsf-sdwr-gdfgs4rfsdf43]
-  PLATFORMS: [android,ios]
-  LABEL: test01
-  START_DATE: 2020-04-23
-  END_DATE: 2020-10-28
-FITBIT:
-  DEVICE_IDS: [fitbit1]
-  LABEL: test01
-  START_DATE: 2020-04-23
-  END_DATE: 2020-10-28
+!!! note
+    The list `PIDS` in `config.yaml` needs to have the participant file names of the people you want to process. For example, if you created `p01.yaml`, `p02.yaml` and `p03.yaml` files in `/data/external/participant_files/ `, then `PIDS` should be:
+    ```yaml
+    PIDS: [p01, p02, p03] 
+    ```

-```
+!!! tip
+    Attribute *values* of the `[PHONE]` and `[FITBIT]` sections in every participant file are optional which allows you to analyze data from participants that only carried smartphones, only Fitbit devices, or both.

 ??? hint "Optional: Migrating participants files with the old format"
    If you were using the pre-release version of RAPIDS with participant files in plain text (as opposed to yaml), you can run the following command and your old files will be converted into yaml files stored in `data/external/participant_files/`
@ -85,15 +78,30 @@ FITBIT:
    ```bash
    python tools/update_format_participant_files.py
    ```
-  
-!!! tip
-    Attributes of the `[PHONE]` and `[FITBIT]` sections are optional which allows you to analyze data from participants that only carried smartphones, only Fitbit devices, or both.

 ### Structure of participants files

+!!! example "Example of the structure of a participant file"
+
+    In this example, the participant used an android phone, an ios phone, and a fitbit device throughout the study between Apr 23rd 2020 and Oct 28th 2020
+
+    ```yaml
+    PHONE:
+      DEVICE_IDS: [a748ee1a-1d0b-4ae9-9074-279a2b6ba524, dsadas-2324-fgsf-sdwr-gdfgs4rfsdf43]
+      PLATFORMS: [android,ios]
+      LABEL: test01
+      START_DATE: 2020-04-23
+      END_DATE: 2020-10-28
+    FITBIT:
+      DEVICE_IDS: [fitbit1]
+      LABEL: test01
+      START_DATE: 2020-04-23
+      END_DATE: 2020-10-28
+    ```
+
 **For `[PHONE]`**

-| Key            | Description                                                                                                                                                                                                                                                                                                                                |
+| Key&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;            | Description                                                                                                                                                                                                                                                                                                                                |
 |-------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
 | `[DEVICE_IDS]` | An array of the strings that uniquely identify each smartphone, you can have more than one for when participants changed phones in the middle of the study, in this case, data from all their devices will be joined and relabeled with the last 1 on this list.                                                                           |
 | `[PLATFORMS]`  | An array that specifies the OS of each smartphone in  `[DEVICE_IDS]` , use a combination of  `android`  or  `ios`  (we support participants that changed platforms in the middle of your study!). If you have an  `aware_device`  table in your database you can set  `[PLATFORMS]: [multiple]`  and RAPIDS will infer them automatically. |
@ -103,7 +111,7 @@ FITBIT:

 **For `[FITBIT]`**

-| Key              | Description                                                                                               |
+| Key&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;              | Description                                                                                               |
 |------------------|-----------------------------------------------------------------------------------------------------------|
 | `[DEVICE_IDS]`   | An array of the strings that uniquely identify each Fitbit, you can have more than one in case the participant changed devices in the middle of the study, in this case, data from all devices will be joined and relabeled with the last  `device_id`  on this list. |
 | `[LABEL]`        | A string that is used in reports and visualizations.                                                                                                                                                                                                                  |
@ -306,13 +314,15 @@ Day segments (or epochs) are the time windows on which you want to extract behav
 ### Segment Examples

 --- 
-## Device Data Configuration
+## Device Data Source Configuration

 You might need to modify the following config keys in your `config.yaml` depending on what devices your participants used and where you are storing your data.

 !!! hint
    You can ignore `[SENSOR_DATA][PHONE]` or `[SENSOR_DATA][FITBIT]` if you are not working with either devices.

+The relevant `config.yaml` section looks as follows by default:
+
 ```yaml
 SENSOR_DATA:
  PHONE:
@ -377,7 +387,7 @@ PHONE_MESSAGES:
      SRC_FOLDER: "rapids" # inside src/features/phone_messages
 ```

-| Key                           | Description                                                                                                                                                                                                                                                                                                                     |
+| Key&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;                           | Description                                                                                                                                                                                                                                                                                                                     |
 |-------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
 | `[TABLE]`                     | The name of the table in your database that stores this sensor data.                                                                                                                                                                                                                                                            |
 | `[PROVIDERS]`                 | A collection of  `providers` . A provider is an author or group of authors that created specific features for the sensor at hand. The provider for all the features implemented by our team is called  `RAPIDS`  but we have also included contributions from other researchers (for example  `DORYAB`  for location features). |
--- a/docs/setup/execution.md
+++ b/docs/setup/execution.md
@ -0,0 +1,17 @@
+# Execution
+
+After you have [installed](/setup/installation) and [configured](/setup/configuration) RAPIDS, use the following command to execute it.
+
+```bash
+./rapids -j1
+```
+
+!!! info
+    The script `#!bash ./rapids` is a wrapper around Snakemake so you can pass any parameters that Snakemake accepts (e.g. `-j1`). 
+    
+    Any changes to the `config.yaml` file will be applied automatically and only the relevant files will be updated.
+
+!!! hint "Multi-core"
+    You can run RAPIDS over multiple cores by modifying the `-j` argument (e.g. use `-j8` to use 8 cores). **However**, take into account that this means multiple sensor datasets for different participants will be load in memory at the same time. If RAPIDS crashes because it ran out of memory reduce the number of cores and try again.
+
+    As reference, we have run RAPIDS over 12 cores and 32 Gb of RAM without problems for a study with 200 participants with 14 days of low-frequency smartphone data (no accelerometer, gyroscope, or magnetometer).
--- a/docs/workflow-examples/minimal.md
+++ b/docs/workflow-examples/minimal.md
@ -0,0 +1,80 @@
+Minimal Working Example
+=======================
+
+This is a quick guide for creating and running a simple pipeline to extract missing, outgoing, and incoming call features for `daily` and `night` epochs of one participant monitored on the US East coast.
+
+1. Install RAPIDS and make sure your `conda` environment is active (see [Installation](/setup/installation))
+2. For the [Initial Configuration](/setup/configuration) steps do the following and use the example as a guide:
+    
+    !!! info "Things to change on each configuration step"
+        1\. Setup your database connection credentials in `.env`. We assume your credentials group is called `MY_GROUP`.
+
+        2\. Set `America/New_York` as the timezone
+
+        3\. Create a participant file `p01.yaml` based on one of your participants and add `p01` to `[PIDS]` in `config.yaml`
+        
+        4\. Set `[DAY_SEGMENTS][TYPE]` to `PERIODIC` and `FILE` to a file containing the following lines:
+             ```csv
+             label,start_time,length,repeats_on,repeats_value
+             daily,00:00:00,23H 59M 59S,every_day,0
+             night,00:00:00,5H 59M 59S,every_day,0
+             ```
+
+         5\. If you collected data with AWARE you won't need to modify the attributes of `[SENSOR_DATA][PHONE]`
+
+         6\. Set `[PHONE_CALLS][PROVIDERS][RAPIDS][COMPUTE]` to `True`
+
+
+    !!! example "Example of the `config.yaml` sections after the changes outlined above"
+        ```
+        PIDS: [p01]
+
+        TIMEZONE: &timezone
+        America/New_York
+
+        DATABASE_GROUP: &database_group
+        MY_GROUP
+
+        # ... other irrelevant sections
+
+        DAY_SEGMENTS: &day_segments
+            TYPE: PERIODIC
+            FILE: "data/external/daysegments_periodic.csv" # make sure the three lines specified above are in the file
+            INCLUDE_PAST_PERIODIC_SEGMENTS: FALSE
+
+        # No need to change this if you collected AWARE data on a database and your credentials are grouped under `MY_GROUP` in `.env`
+        SENSOR_DATA:
+            PHONE:
+                SOURCE: 
+                    TYPE: DATABASE
+                    DATABASE_GROUP: *database_group
+                    DEVICE_ID_COLUMN: device_id # column name
+                TIMEZONE: 
+                    TYPE: SINGLE # SINGLE or MULTIPLE
+                    VALUE: *timezone 
+
+
+        ############## PHONE ###########################################################
+        ################################################################################
+
+        ....
+
+        # Communication call features config, TYPES and FEATURES keys need to match
+        PHONE_CALLS:
+            TABLE: calls # change if your calls table has a different name
+            PROVIDERS:
+                RAPIDS:
+                    COMPUTE: True # set this to True!
+                    CALL_TYPES: ...
+        ```
+
+3. Run RAPIDS
+    ```bash
+    ./rapids -j1
+    ```
+4. The call features for daily and morning day segments will be in 
+   ```
+   /data/processed/features/p01/phone_calls.csv
+   ```
+
+
--- a/mkdocs.yml
+++ b/mkdocs.yml
@ -48,4 +48,7 @@ pages:
  - Home: 'index.md'
  - Setup:
    - Installation: 'setup/installation.md'
-    - Initial Configuration: setup/configuration.md
+    - Initial Configuration: setup/configuration.md
+    - Execution: setup/execution.md
+  - Example Workflows:
+    - Minimal: workflow-examples/minimal.md