From 3ee8199574aeaec926d76b02308e1a435a719573 Mon Sep 17 00:00:00 2001 From: JulioV Date: Sun, 14 Mar 2021 11:40:04 -0400 Subject: [PATCH] Update docs --- config.yaml | 4 +- docs/change-log.md | 2 +- docs/features/add-new-features.md | 106 +++++++----------- docs/features/feature-introduction.md | 1 + docs/index.md | 8 +- docs/migrating-from-old-versions.md | 4 +- .../{where-do-i-start.md => overview.md} | 19 ++-- docs/workflow-examples/minimal.md | 6 +- mkdocs.yml | 2 +- 9 files changed, 64 insertions(+), 88 deletions(-) rename docs/setup/{where-do-i-start.md => overview.md} (90%) diff --git a/config.yaml b/config.yaml index 169fc106..c91a0aea 100644 --- a/config.yaml +++ b/config.yaml @@ -44,7 +44,7 @@ TIMEZONE: # PHONE # ######################################################################################################################## -# See https://www.rapids.science/latest/setup/configuration/#device-data-source-configuration +# See https://www.rapids.science/latest/setup/configuration/#data-stream-configuration PHONE_DATA_STREAMS: USE: aware_mysql @@ -329,7 +329,7 @@ PHONE_WIFI_VISIBLE: # FITBIT # ######################################################################################################################## -# See https://www.rapids.science/latest/setup/configuration/#device-data-source-configuration +# See https://www.rapids.science/latest/setup/configuration/#data-stream-configuration FITBIT_DATA_STREAMS: USE: fitbitjson_mysql diff --git a/docs/change-log.md b/docs/change-log.md index 3b103fe8..fa662385 100644 --- a/docs/change-log.md +++ b/docs/change-log.md @@ -1,7 +1,7 @@ # Change Log ## v1.0.0 -- Add a new [Where do I start?](../setup/where-do-i-start/) page. +- Add a new [Overview](../setup/overview/) page. - You can [extend](../datastreams/add-new-data-streams/) RAPIDS with your own [data streams](../datastreams/data-streams-introduction/). Data streams are data collected with other sensing apps besides AWARE (like Beiwe, mindLAMP), and stored in other data containers (databases, files) besides MySQL. - Support to analyze Empatica wearable data (thanks to Joe Kim and Brinnae Bent from the [DBDP](https://dbdp.org/)) - Support to analyze AWARE data stored in [CSV files](../datastreams/aware-csv/) and InfluxDB databases (the latter thanks to Neil Singh) diff --git a/docs/features/add-new-features.md b/docs/features/add-new-features.md index 6ea0c20f..94321739 100644 --- a/docs/features/add-new-features.md +++ b/docs/features/add-new-features.md @@ -3,17 +3,17 @@ !!! hint - We recommend reading the [Behavioral Features Introduction](../feature-introduction/) before reading this page. - You can implement new features in Python or R scripts. - - You won't have to deal with time zones, dates, times, data cleaning or preprocessing. The data that RAPIDS pipes to your feature extraction code is ready to process. + - You won't have to deal with time zones, dates, times, data cleaning, or preprocessing. The data that RAPIDS pipes to your feature extraction code are ready to process. ## New Features for Existing Sensors You can add new features to any existing sensors (see list below) by adding a new provider in three steps: 1. [Modify](#modify-the-configyaml-file) the `config.yaml` file -2. [Create](#create-a-provider-folder-script-and-function) a provider folder, script and function +2. [Create](#create-a-feature-provider-script) your feature provider script 3. [Implement](#implement-your-feature-extraction-code) your features extraction code -As a tutorial, we will add a new provider for `PHONE_ACCELEROMETER` called `VEGA` that extracts `feature1`, `feature2`, `feature3` in Python and that it requires a parameter from the user called `MY_PARAMETER`. +As a tutorial, we will add a new provider for `PHONE_ACCELEROMETER` called `VEGA` that extracts `feature1`, `feature2`, `feature3` with a Python script that requires a parameter from the user called `MY_PARAMETER`. ??? info "Existing Sensors" An existing sensor of any device with a configuration entry in `config.yaml`: @@ -62,21 +62,22 @@ As a tutorial, we will add a new provider for `PHONE_ACCELEROMETER` called `VEGA ### Modify the `config.yaml` file -In this step you need to add your provider configuration section under the relevant sensor in `config.yaml`. See our example for our tutorial's `VEGA` provider for `PHONE_ACCELEROMETER`: +In this step, you need to add your provider configuration section under the relevant sensor in `config.yaml`. See our example for our tutorial's `VEGA` provider for `PHONE_ACCELEROMETER`: ??? example "Example configuration for a new accelerometer provider `VEGA`" - ```yaml + ```yaml hl_lines="12 13 14 15 16 17" PHONE_ACCELEROMETER: CONTAINER: accelerometer PROVIDERS: - RAPIDS: + RAPIDS: # this is a feature provider COMPUTE: False ... - PANDA: + PANDA: # this is another feature provider COMPUTE: False ... - VEGA: + + VEGA: # this is our new feature provider COMPUTE: False FEATURES: ["feature1", "feature2", "feature3"] MY_PARAMTER: a_string @@ -89,31 +90,34 @@ In this step you need to add your provider configuration section under the relev |---|---| |`[COMPUTE]`| Flag to activate/deactivate your provider |`[FEATURES]`| List of features your provider supports. Your provider code should only return the features on this list -|`[MY_PARAMTER]`| An arbitrary parameter that our example provider `VEGA` needs. This can be a boolean, integer, float, string or an array of any of such types. +|`[MY_PARAMTER]`| An arbitrary parameter that our example provider `VEGA` needs. This can be a boolean, integer, float, string, or an array of any of such types. |`[SRC_LANGUAGE]`| The programming language of your provider script, it can be `python` or `r`, in our example `python` |`[SRC_FOLDER]`| The name of your provider in lower case, in our example `vega` (this will be the name of your folder in the next step) -### Create a provider folder, script and function +### Create a feature provider script -In this step you need to add a folder, script and function for your provider. - -5. Create your provider **folder** under `src/feature/DEVICE_SENSOR/YOUR_PROVIDER`, in our example `src/feature/phone_accelerometer/vega` (same as `[SRC_FOLDER]` in the step above). -6. Create your provider **script** inside your provider folder, it can be a Python file called `main.py` or an R file called `main.R`. -7. Add your provider **function** in your provider script. The name of such function should be `[providername]_features`, in our example `vega_features` - - !!! info "Python function" - ```python - def [providername]_features(sensor_data_files, time_segment, provider, filter_data_by_segment, *args, **kwargs): - ``` - - !!! info "R function" - ```r - [providername]_features <- function(sensor_data, time_segment, provider) - ``` +Create your feature Python or R script called `main.py` or `main.R` in the correct folder, `src/feature/[sensorname]/[providername]/`. RAPIDS automatically loads and executes it based on the config section you added in the last step. For our example, this script is: +```bash +src/feature/phone_accelerometer/vega/main.py +``` ### Implement your feature extraction code +Every feature script (`main.[py|R]`) needs a `[providername]_features` function with specific parameters. RAPIDS calls this function with the sensor data ready to process and with other functions and arguments you will need. -The provider function that you created in the step above will receive the following parameters: +=== "Python function" + ```python + def [providername]_features(sensor_data_files, time_segment, provider, filter_data_by_segment, *args, **kwargs): + # empty for now + return(your_features_df) + ``` + +=== "R function" + ```r + [providername]_features <- function(sensor_data, time_segment, provider){ + # empty for now + return(your_features_df) + } + ``` | Parameter                                       | Description |---|---| @@ -125,32 +129,32 @@ The provider function that you created in the step above will receive the follow |`**kwargs`| Python only. Not used for now -The code to extract your behavioral features should be implemented in your provider function and in general terms it will have three stages: +The next step is to implement the code that computes your behavioral features in your provider script's function. As with any other script, this function can call other auxiliary methods, but in general terms, it should have three stages: ??? info "1. Read a participant's data by loading the CSV data stored in the file pointed by `sensor_data_files`" ``` python acc_data = pd.read_csv(sensor_data_files["sensor_data"]) ``` - Note that phone's battery, screen, and activity recognition data is given as episodes instead of event rows (for example, start and end timestamps of the periods the phone screen was on) + Note that the phone's battery, screen, and activity recognition data are given as episodes instead of event rows (for example, start and end timestamps of the periods the phone screen was on) ??? info "2. Filter your data to process only those rows that belong to `time_segment`" - This step is only one line of code, but to undersand why we need it, keep reading. + This step is only one line of code, but keep reading to understand why we need it. ```python acc_data = filter_data_by_segment(acc_data, time_segment) ``` You should use the `filter_data_by_segment()` function to process and group those rows that belong to each of the [time segments RAPIDS could be configured with](../../setup/configuration/#time-segments). - Let's understand the `filter_data_by_segment()` function with an example. A RAPIDS user can extract features on any arbitrary [time segment](../../setup/configuration/#time-segments). A time segment is a period of time that has a label and one or more instances. For example, the user (or you) could have requested features on a daily, weekly, and week-end basis for `p01`. The labels are arbritrary and the instances depend on the days a participant was monitored for: + Let's understand the `filter_data_by_segment()` function with an example. A RAPIDS user can extract features on any arbitrary [time segment](../../setup/configuration/#time-segments). A time segment is a period that has a label and one or more instances. For example, the user (or you) could have requested features on a daily, weekly, and weekend basis for `p01`. The labels are arbitrary, and the instances depend on the days a participant was monitored for: - the daily segment could be named `my_days` and if `p01` was monitored for 14 days, it would have 14 instances - the weekly segment could be named `my_weeks` and if `p01` was monitored for 14 days, it would have 2 instances. - the weekend segment could be named `my_weekends` and if `p01` was monitored for 14 days, it would have 2 instances. - For this example, RAPIDS will call your provider function three times for `p01`, once where `time_segment` is `my_days`, once where `time_segment` is `my_weeks` and once where `time_segment` is `my_weekends`. In this example not every row in `p01`'s data needs to take part in the feature computation for either segment **and** the rows need to be grouped differently. + For this example, RAPIDS will call your provider function three times for `p01`, once where `time_segment` is `my_days`, once where `time_segment` is `my_weeks`, and once where `time_segment` is `my_weekends`. In this example, not every row in `p01`'s data needs to take part in the feature computation for either segment **and** the rows need to be grouped differently. Thus `filter_data_by_segment()` comes in handy, it will return a data frame that contains the rows that were logged during a time segment plus an extra column called `local_segment`. This new column will have as many unique values as time segment instances exist (14, 2, and 2 for our `p01`'s `my_days`, `my_weeks`, and `my_weekends` examples). After filtering, **you should group the data frame by this column and compute any desired features**, for example: @@ -158,52 +162,22 @@ The code to extract your behavioral features should be implemented in your provi acc_features["maxmagnitude"] = acc_data.groupby(["local_segment"])["magnitude"].max() ``` - The reason RAPIDS does not filter the participant's data set for you is because your code might need to compute something based on a participant's complete dataset before computing their features. For example, you might want to identify the number that called a participant the most throughout the study before computing a feature with the number of calls the participant received from this number. + The reason RAPIDS does not filter the participant's data set for you is because your code might need to compute something based on a participant's complete dataset before computing their features. For example, you might want to identify the number that called a participant the most throughout the study before computing a feature with the number of calls the participant received from that number. ??? info "3. Return a data frame with your features" After filtering, grouping your data, and computing your features, your provider function should return a data frame that has: - - One row per time segment instance (e.g. 14 our `p01`'s `my_days` example) + - One row per time segment instance (e.g., 14 our `p01`'s `my_days` example) - The `local_segment` column added by `filter_data_by_segment()` - - One column per feature. By convention the name of your features should only contain letters or numbers (`feature1`). RAPIDS will automatically add the right sensor and provider prefix (`phone_accelerometr_vega_`) + - One column per feature. The name of your features should only contain letters or numbers (`feature1`) by convention. RAPIDS automatically adds the correct sensor and provider prefix; in our example, this prefix is `phone_accelerometr_vega_`. ??? example "`PHONE_ACCELEROMETER` Provider Example" - For your reference, this a short example of our own provider (`RAPIDS`) for `PHONE_ACCELEROMETER` that computes five acceleration features + For your reference, this our own provider (`RAPIDS`) for `PHONE_ACCELEROMETER` that computes five acceleration features ```python - def rapids_features(sensor_data_files, time_segment, provider, filter_data_by_segment, *args, **kwargs): - acc_data = pd.read_csv(sensor_data_files["sensor_data"]) - requested_features = provider["FEATURES"] - # name of the features this function can compute - base_features_names = ["maxmagnitude", "minmagnitude", "avgmagnitude", "medianmagnitude", "stdmagnitude"] - # the subset of requested features this function can compute - features_to_compute = list(set(requested_features) & set(base_features_names)) + --8<---- "src/features/phone_accelerometer/rapids/main.py" - acc_features = pd.DataFrame(columns=["local_segment"] + features_to_compute) - if not acc_data.empty: - acc_data = filter_data_by_segment(acc_data, time_segment) - - if not acc_data.empty: - acc_features = pd.DataFrame() - # get magnitude related features: magnitude = sqrt(x^2+y^2+z^2) - magnitude = acc_data.apply(lambda row: np.sqrt(row["double_values_0"] ** 2 + row["double_values_1"] ** 2 + row["double_values_2"] ** 2), axis=1) - acc_data = acc_data.assign(magnitude = magnitude.values) - - if "maxmagnitude" in features_to_compute: - acc_features["maxmagnitude"] = acc_data.groupby(["local_segment"])["magnitude"].max() - if "minmagnitude" in features_to_compute: - acc_features["minmagnitude"] = acc_data.groupby(["local_segment"])["magnitude"].min() - if "avgmagnitude" in features_to_compute: - acc_features["avgmagnitude"] = acc_data.groupby(["local_segment"])["magnitude"].mean() - if "medianmagnitude" in features_to_compute: - acc_features["medianmagnitude"] = acc_data.groupby(["local_segment"])["magnitude"].median() - if "stdmagnitude" in features_to_compute: - acc_features["stdmagnitude"] = acc_data.groupby(["local_segment"])["magnitude"].std() - - acc_features = acc_features.reset_index() - - return acc_features ``` ## New Features for Non-Existing Sensors diff --git a/docs/features/feature-introduction.md b/docs/features/feature-introduction.md index 3ee36fd7..b22f23f1 100644 --- a/docs/features/feature-introduction.md +++ b/docs/features/feature-introduction.md @@ -6,6 +6,7 @@ Every device sensor has a corresponding config section in `config.yaml`, these s - We recommend reading this page if you are using RAPIDS for the first time - All computed sensor features are stored under `/data/processed/features` on files per sensor, per participant and per study (all participants). - Every time you change any sensor parameters, provider parameters or provider features, all the necessary files will be updated as soon as you execute RAPIDS. + - In short, to extract features offered by a provider, you need to set its `[COMPUTE]` flag to `TRUE`, configure any of its parameters, and [execute](../../setup/execution) RAPIDS. !!! example "Config section example for `PHONE_ACCELEROMETER`" diff --git a/docs/index.md b/docs/index.md index e47692a7..90ef6d56 100644 --- a/docs/index.md +++ b/docs/index.md @@ -2,9 +2,9 @@ Reproducible Analysis Pipeline for Data Streams (RAPIDS) allows you to process smartphone and wearable data to [extract](features/feature-introduction.md) and [create](features/add-new-features.md) **behavioral features** (a.k.a. digital biomarkers), [visualize](visualizations/data-quality-visualizations.md) mobile sensor data, and [structure](workflow-examples/analysis.md) your analysis into reproducible workflows. -RAPIDS is open source, documented, modular, tested, and reproducible. At the moment, we support [data streams](../../datastreams/data-streams-introduction) logged by smartphones, Fitbit wearables, and, in collaboration with the [DBDP](https://dbdp.org/), Empatica wearables (but you can [add your own]((../../datastreams/add-new-data-streams)) too). +RAPIDS is open source, documented, modular, tested, and reproducible. At the moment, we support [data streams](../../datastreams/data-streams-introduction) logged by smartphones, Fitbit wearables, and, in collaboration with the [DBDP](https://dbdp.org/), Empatica wearables (but you can [add your own](../../datastreams/add-new-data-streams) too). -**If you want to know more head over to [Where do I start?](../setup/where-do-i-start/)** +**If you want to know more head over to [Overview](../setup/overview/)** !!! tip :material-slack: Questions or feedback can be posted on the \#rapids channel in AWARE Framework\'s [slack](http://awareframework.com:3000/). @@ -13,10 +13,10 @@ RAPIDS is open source, documented, modular, tested, and reproducible. At the mom :fontawesome-solid-tasks: Join our discussions on our algorithms and assumptions for feature [processing](https://github.com/carissalow/rapids/discussions). - :fontawesome-solid-play: Ready to start? Go to [Installation](setup/installation/), then to [Configuration](setup/configuration/), and then to [Execution](setup/execution/) - :fontawesome-solid-sync-alt: Are you upgrading from RAPIDS `0.4.x` or older? Follow this [guide](migrating-from-old-versions) + :fontawesome-solid-play: Ready? Go to [Overview](../setup/overview/). + ## What are the benefits of using RAPIDS? 1. **Consistent analysis**. Every participant sensor dataset is analyzed in the same way and isolated from each other. diff --git a/docs/migrating-from-old-versions.md b/docs/migrating-from-old-versions.md index ba203409..f5fbf873 100644 --- a/docs/migrating-from-old-versions.md +++ b/docs/migrating-from-old-versions.md @@ -4,8 +4,8 @@ There are four actions that you need to take if you were using RAPIDS `0.4.3` or older ([before Feb 9th, 2021](https://github.com/carissalow/rapids/releases/tag/v0.4.3)): -??? check "Check the new Where do I start? page" - Check the new [Where do I start?](../setup/where-do-i-start/) page. Hopefully, it is a better overview of RAPIDS and provides answers to Frequently Asked Questions. +??? check "Check the new Overview page" + Check the new [Overview](../setup/overview/) page. Hopefully, it is a better overview of RAPIDS and provides answers to Frequently Asked Questions. ??? check "Deploy RAPIDS in a new folder" diff --git a/docs/setup/where-do-i-start.md b/docs/setup/overview.md similarity index 90% rename from docs/setup/where-do-i-start.md rename to docs/setup/overview.md index d366c2fc..7feb1888 100644 --- a/docs/setup/where-do-i-start.md +++ b/docs/setup/overview.md @@ -1,4 +1,4 @@ -# Where do I start? +# Overview Let's review some key concepts we use throughout these docs: @@ -18,19 +18,18 @@ Let's review some key concepts we use throughout these docs: |credentials.yaml| A YAML file where you can define credential groups (user, password, host, etc.) if your data stream needs to connect to a database or Web API| |Participant file| A YAML file that links one or more smartphone or wearable devices that a single participant used. RAPIDS needs one file per participant. | -RAPIDS functionality includes: +You can do one or more of these things with RAPIDS: -- [Extract behavioral features](../../features/feature-introduction/) from smartphone, Fitbit, and Empatica's [supported data streams](../../datastreams/data-streams-introduction/) -- [Add your own behavioral features](../../features/add-new-features/) (we can include them in RAPIDS if you want to share them with the community) -- [Add support for new data streams](../../datastreams/add-new-data-streams/) if yours cannot be processed by RAPIDS yet -- Create visualizations for [data quality control](../../visualizations/data-quality-visualizations/) and [feature inspection](../../visualizations/feature-visualizations/) -- [Extending RAPIDS to organize your analysis](../../workflow-examples/analysis/) and publish a code repository along with your code +1. [Extract behavioral features](../../features/feature-introduction/) from smartphone, Fitbit, and Empatica's [supported data streams](../../datastreams/data-streams-introduction/) +1. [Add your own behavioral features](../../features/add-new-features/) (we can include them in RAPIDS if you want to share them with the community) +1. [Add support for new data streams](../../datastreams/add-new-data-streams/) if yours cannot be processed by RAPIDS yet +1. Create visualizations for [data quality control](../../visualizations/data-quality-visualizations/) and [feature inspection](../../visualizations/feature-visualizations/) +1. [Extending RAPIDS to organize your analysis](../../workflow-examples/analysis/) and publish a code repository along with your code +**In order to follow any of the previous tutorials, you will have to [Install](../installation/), [Configure](../configuration/), and learn how to [Execute](../execution/) RAPIDS.** !!! hint - - If you want to use RAPIDS for any of the above, you will have to [Install](../installation/), [Configure](../configuration/), and learn how to [Execute](../execution/) it. - - - We also recommend you follow the [Minimal Example](../../workflow-examples/minimal/) tutorial to get familiar with RAPIDS + - We recommend you follow the [Minimal Example](../../workflow-examples/minimal/) tutorial to get familiar with RAPIDS - [Email us](../../team), create a [Github issue](https://github.com/carissalow/rapids/issues) or text us in [Slack](http://awareframework.com:3000/) if you have any questions diff --git a/docs/workflow-examples/minimal.md b/docs/workflow-examples/minimal.md index 84e3b36a..2b7e1a13 100644 --- a/docs/workflow-examples/minimal.md +++ b/docs/workflow-examples/minimal.md @@ -54,7 +54,9 @@ This is a quick guide for creating and running a simple pipeline to extract miss 5. **Modify your [device data stream configuration](../../setup/configuration#data-stream-configuration)** - Set `[PHONE_DATA_STREAMS][USE]` to `aware_csv`. + 1. Set `[PHONE_DATA_STREAMS][USE]` to `aware_csv`. + + 2. We will use the default value for `[PHONE_DATA_STREAMS][aware_csv][FOLDER]` since we already stored the test calls CSV file there. 6. **Select what [sensors and features](../../setup/configuration#sensor-and-features-to-process) you want to process.** @@ -77,7 +79,7 @@ This is a quick guide for creating and running a simple pipeline to extract miss TIME_SEGMENTS: &time_segments TYPE: PERIODIC - FILE: "data/external/timesegments_periodic.csv" # make sure the three lines specified above are in the file + FILE: "data/external/timesegments_periodic.csv" INCLUDE_PAST_PERIODIC_SEGMENTS: FALSE PHONE_DATA_STREAMS: diff --git a/mkdocs.yml b/mkdocs.yml index 44770866..55b29a8e 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -73,7 +73,7 @@ extra_css: nav: - Home: 'index.md' - Setup: - - Where do I start?: setup/where-do-i-start.md + - Overview: setup/overview.md - Minimal Example: workflow-examples/minimal.md - Installation: 'setup/installation.md' - Configuration: setup/configuration.md