From 91f4a24f6535e578541cc1889b937a79bac3ba65 Mon Sep 17 00:00:00 2001 From: JulioV Date: Wed, 2 Jun 2021 12:10:31 -0400 Subject: [PATCH] Add aware_mysql_split phone data stream --- config.yaml | 9 +- docs/datastreams/add-new-data-streams.md | 2 + docs/datastreams/aware-mysql-split.md | 15 + docs/datastreams/data-streams-introduction.md | 1 + docs/setup/configuration.md | 20 +- .../streams/aware_mysql_split/container.R | 78 ++++ .../streams/aware_mysql_split/format.yaml | 337 ++++++++++++++++++ tools/config.schema.yaml | 23 ++ 8 files changed, 480 insertions(+), 5 deletions(-) create mode 100644 docs/datastreams/aware-mysql-split.md create mode 100644 src/data/streams/aware_mysql_split/container.R create mode 100644 src/data/streams/aware_mysql_split/format.yaml diff --git a/config.yaml b/config.yaml index eae81b29..cb77e56b 100644 --- a/config.yaml +++ b/config.yaml @@ -46,13 +46,16 @@ PHONE_DATA_STREAMS: USE: aware_mysql # AVAILABLE: - aware_mysql: + aware_mysql: # one table per sensor with all participants' data DATABASE_GROUP: MY_GROUP - aware_csv: + aware_mysql_split: # one table per sensor per participant + DATABASE_GROUP: MY_GROUP + + aware_csv: # one CSV file per sensor with all participants's data FOLDER: data/external/aware_csv - aware_influxdb: + aware_influxdb: # one table per sensor with all participants' data DATABASE_GROUP: MY_GROUP # Sensors ------ diff --git a/docs/datastreams/add-new-data-streams.md b/docs/datastreams/add-new-data-streams.md index fa49d0eb..036123b5 100644 --- a/docs/datastreams/add-new-data-streams.md +++ b/docs/datastreams/add-new-data-streams.md @@ -31,6 +31,8 @@ PHONE_DATA_STREAMS: DATABASE_GROUP: MY_GROUP # users define this group (user, password, host, etc.) in credentials.yaml ``` +Secondly, update `tools/config.schema.yaml` including `[*_DATA_STREAMS][properties][USE][enum]` and `[*_DATA_STREAMS][required]`. This is needed to make sure users do not use invalid values in your data stream's `config.yaml` entry by mistake. Take the other streams' entries as examples or check this [guide](../developers/validation-schema-config.md). + Then implement one or both of the following functions: === "pull_data" diff --git a/docs/datastreams/aware-mysql-split.md b/docs/datastreams/aware-mysql-split.md new file mode 100644 index 00000000..a04cb640 --- /dev/null +++ b/docs/datastreams/aware-mysql-split.md @@ -0,0 +1,15 @@ +# `aware_mysql_split` + +This [data stream](../../datastreams/data-streams-introduction) handles iOS and Android sensor data collected with the [AWARE Framework](https://awareframework.com/) and stored in a MySQL database. This stream is similar to `aware_mysql` except for the way data is stored in the database tables as explained below. + +## Container +A MySQL database with a table per sensor **per participant**. RAPIDS assumes such tables' names follow the format `deviceid_sensorname` (for example `a748ee1a-1d0b-4ae9-9074-279a2b6ba524_accelerometer`); if this is not the case, you can modify the SQL query in this stream's `container.R`script. RAPIDS also assumes that an empty table exists for those participants that don’t have data for a specific sensor. + +The script to connect and download data from this container is at: +```bash +src/data/streams/aware_mysql_split/container.R +``` + +## Format + +--8<---- "docs/snippets/aware_format.md" diff --git a/docs/datastreams/data-streams-introduction.md b/docs/datastreams/data-streams-introduction.md index 10fb84a2..d18c8ae4 100644 --- a/docs/datastreams/data-streams-introduction.md +++ b/docs/datastreams/data-streams-introduction.md @@ -16,6 +16,7 @@ For reference, these are the data streams we currently support: | Data Stream | Device | Format | Container | Docs |--|--|--|--|--| | `aware_mysql`| Phone | AWARE app | MySQL | [link](../aware-mysql) +| `aware_mysql_split`| Phone | AWARE app | MySQL | [link](../aware-mysql-split) | `aware_csv`| Phone | AWARE app | CSV files | [link](../aware-csv) | `aware_influxdb` (beta)| Phone | AWARE app | InfluxDB | [link](../aware-influxdb) | `fitbitjson_mysql`| Fitbit | JSON (per [Fitbit's API](https://dev.fitbit.com/build/reference/web-api/)) | MySQL | [link](../fitbitjson-mysql) diff --git a/docs/setup/configuration.md b/docs/setup/configuration.md index b1a69bbe..ba20b53a 100644 --- a/docs/setup/configuration.md +++ b/docs/setup/configuration.md @@ -475,13 +475,29 @@ Modify the following keys in your `config.yaml` depending on the [data stream](. | `[DATABASE_GROUP]` | A database credentials group. Read the instructions below to set it up | --8<---- "docs/snippets/database.md" - - === "aware_csv" + + === "aware_mysql_split" | Key | Description | |---------------------|----------------------------------------------------------------------------------------------------------------------------| + | `[DATABASE_GROUP]` | A database credentials group. Read the instructions below to set it up | + + --8<---- "docs/snippets/database.md" + + === "aware_csv" + + | Key                 | Description| + |---------------------|----------------------------------------------------------------------------------------------------------------------------| | `[FOLDER]` | Folder where you have to place a CSV file **per** phone sensor. Each file has to contain all the data from every participant you want to process. | + === "aware_influxdb" + + | Key | Description | + |---------------------|----------------------------------------------------------------------------------------------------------------------------| + | `[DATABASE_GROUP]` | A database credentials group. Read the instructions below to set it up | + + --8<---- "docs/snippets/database.md" + diff --git a/src/data/streams/aware_mysql_split/container.R b/src/data/streams/aware_mysql_split/container.R new file mode 100644 index 00000000..0e75e2ff --- /dev/null +++ b/src/data/streams/aware_mysql_split/container.R @@ -0,0 +1,78 @@ +# if you need a new package, you should add it with renv::install(package) so your renv venv is updated +library(RMariaDB) +library(yaml) + +#' @description +#' Auxiliary function to parse the connection credentials from a specifc group in ./credentials.yaml +#' You can reause most of this function if you are connection to a DB or Web API. +#' It's OK to delete this function if you don't need credentials, e.g., you are pulling data from a CSV for example. +#' @param group the yaml key containing the credentials to connect to a database +#' @preturn dbEngine a database engine (connection) ready to perform queries +get_db_engine <- function(group){ + # The working dir is aways RAPIDS root folder, so your credentials file is always /credentials.yaml + credentials <- read_yaml("./credentials.yaml") + if(!group %in% names(credentials)) + stop(paste("The credentials group",group, "does not exist in ./credentials.yaml. The only groups that exist in that file are:", paste(names(credentials), collapse = ","), ". Did you forget to set the group in [PHONE_DATA_STREAMS][aware_mysql][DATABASE_GROUP] in config.yaml?")) + dbEngine <- dbConnect(MariaDB(), db = credentials[[group]][["database"]], + username = credentials[[group]][["user"]], + password = credentials[[group]][["password"]], + host = credentials[[group]][["host"]], + port = credentials[[group]][["port"]]) + return(dbEngine) +} + +# This file gets executed for each PHONE_SENSOR of each participant +# If you are connecting to a database the env file containing its credentials is available at "./.env" +# If you are reading a CSV file instead of a DB table, the @param sensor_container wil contain the file path as set in config.yaml +# You are not bound to databases or files, you can query a web API or whatever data source you need. + +#' @description +#' RAPIDS allows users to use the keyword "infer" (previously "multiple") to automatically infer the mobile Operative System a device was running. +#' If you have a way to infer the OS of a device ID, implement this function. For example, for AWARE data we use the "aware_device" table. +#' +#' If you don't have a way to infer the OS, call stop("Error Message") so other users know they can't use "infer" or the inference failed, +#' and they have to assign the OS manually in the participant file +#' +#' @param stream_parameters The PHONE_STREAM_PARAMETERS key in config.yaml. If you need specific parameters add them there. +#' @param device A device ID string +#' @return The OS the device ran, "android" or "ios" + +infer_device_os <- function(stream_parameters, device){ + dbEngine <- get_db_engine(stream_parameters$DATABASE_GROUP) + query <- paste0("SELECT device_id,brand FROM aware_device WHERE device_id = '", device, "'") + message(paste0("Executing the following query to infer phone OS: ", query)) + os <- dbGetQuery(dbEngine, query) + dbDisconnect(dbEngine) + + if(nrow(os) > 0) + return(os %>% mutate(os = ifelse(brand == "iPhone", "ios", "android")) %>% pull(os)) + else + stop(paste("We cannot infer the OS of the following device id because it does not exist in the aware_device table:", device)) + + return(os) +} + +#' @description +#' Gets the sensor data for a specific device id from a database table, file or whatever source you want to query +#' +#' @param stream_parameters The PHONE_STREAM_PARAMETERS key in config.yaml. If you need specific parameters add them there. +#' @param device A device ID string +#' @param sensor_container database table or file containing the sensor data for all participants. This is the PHONE_SENSOR[CONTAINER] key in config.yaml +#' @param columns the columns needed from this sensor (we recommend to only return these columns instead of every column in sensor_container) +#' @return A dataframe with the sensor data for device + +pull_data <- function(stream_parameters, device, sensor, sensor_container, columns){ + dbEngine <- get_db_engine(stream_parameters$DATABASE_GROUP) + query <- paste0("SELECT ", paste(columns, collapse = ",")," FROM `", device, "_", sensor_container, "` WHERE ", columns$DEVICE_ID ," = '", device,"'") + # Letting the user know what we are doing + message(paste0("Executing the following query to download data: ", query)) + sensor_data <- dbGetQuery(dbEngine, query) + + dbDisconnect(dbEngine) + + if(nrow(sensor_data) == 0) + warning(paste("The device '", device,"' did not have data in ", sensor_container)) + + return(sensor_data) +} + diff --git a/src/data/streams/aware_mysql_split/format.yaml b/src/data/streams/aware_mysql_split/format.yaml new file mode 100644 index 00000000..bc866f83 --- /dev/null +++ b/src/data/streams/aware_mysql_split/format.yaml @@ -0,0 +1,337 @@ +PHONE_ACCELEROMETER: + ANDROID: + RAPIDS_COLUMN_MAPPINGS: + TIMESTAMP: timestamp + DEVICE_ID: device_id + DOUBLE_VALUES_0: double_values_0 + DOUBLE_VALUES_1: double_values_1 + DOUBLE_VALUES_2: double_values_2 + MUTATION: + COLUMN_MAPPINGS: + SCRIPTS: # List any python or r scripts that mutate your raw data + IOS: + RAPIDS_COLUMN_MAPPINGS: + TIMESTAMP: timestamp + DEVICE_ID: device_id + DOUBLE_VALUES_0: double_values_0 + DOUBLE_VALUES_1: double_values_1 + DOUBLE_VALUES_2: double_values_2 + MUTATION: + COLUMN_MAPPINGS: + SCRIPTS: # List any python or r scripts that mutate your raw data + +PHONE_ACTIVITY_RECOGNITION: + ANDROID: + RAPIDS_COLUMN_MAPPINGS: + TIMESTAMP: timestamp + DEVICE_ID: device_id + ACTIVITY_NAME: activity_name + ACTIVITY_TYPE: activity_type + CONFIDENCE: confidence + MUTATION: + COLUMN_MAPPINGS: + SCRIPTS: # List any python or r scripts that mutate your raw data + IOS: + RAPIDS_COLUMN_MAPPINGS: + TIMESTAMP: timestamp + DEVICE_ID: device_id + ACTIVITY_NAME: FLAG_TO_MUTATE + ACTIVITY_TYPE: FLAG_TO_MUTATE + CONFIDENCE: FLAG_TO_MUTATE + MUTATION: + COLUMN_MAPPINGS: + ACTIVITIES: activities + CONFIDENCE: confidence + SCRIPTS: # List any python or r scripts that mutate your raw data + - "src/data/streams/mutations/phone/aware/activity_recogniton_ios_unification.R" + +PHONE_APPLICATIONS_CRASHES: + ANDROID: + RAPIDS_COLUMN_MAPPINGS: + TIMESTAMP: timestamp + DEVICE_ID: device_id + PACKAGE_NAME: package_name + APPLICATION_NAME: application_name + APPLICATION_VERSION: application_version + ERROR_SHORT: error_short + ERROR_LONG: error_long + ERROR_CONDITION: error_condition + IS_SYSTEM_APP: is_system_app + MUTATION: + COLUMN_MAPPINGS: + SCRIPTS: # List any python or r scripts that mutate your raw data + +PHONE_APPLICATIONS_FOREGROUND: + ANDROID: + RAPIDS_COLUMN_MAPPINGS: + TIMESTAMP: timestamp + DEVICE_ID: device_id + PACKAGE_NAME: package_name + APPLICATION_NAME: application_name + IS_SYSTEM_APP: is_system_app + MUTATION: + COLUMN_MAPPINGS: + SCRIPTS: # List any python or r scripts that mutate your raw data + +PHONE_APPLICATIONS_NOTIFICATIONS: + ANDROID: + RAPIDS_COLUMN_MAPPINGS: + TIMESTAMP: timestamp + DEVICE_ID: device_id + PACKAGE_NAME: package_name + APPLICATION_NAME: application_name + TEXT: text + SOUND: sound + VIBRATE: vibrate + DEFAULTS: defaults + FLAGS: flags + MUTATION: + COLUMN_MAPPINGS: + SCRIPTS: # List any python or r scripts that mutate your raw data + +PHONE_BATTERY: + ANDROID: + RAPIDS_COLUMN_MAPPINGS: + TIMESTAMP: timestamp + DEVICE_ID: device_id + BATTERY_STATUS: battery_status + BATTERY_LEVEL: battery_level + BATTERY_SCALE: battery_scale + MUTATION: + COLUMN_MAPPINGS: + SCRIPTS: # List any python or r scripts that mutate your raw data + IOS: + RAPIDS_COLUMN_MAPPINGS: + TIMESTAMP: timestamp + DEVICE_ID: device_id + BATTERY_STATUS: FLAG_TO_MUTATE + BATTERY_LEVEL: battery_level + BATTERY_SCALE: battery_scale + MUTATION: + COLUMN_MAPPINGS: + BATTERY_STATUS: battery_status + SCRIPTS: + - "src/data/streams/mutations/phone/aware/battery_ios_unification.R" + +PHONE_BLUETOOTH: + ANDROID: + RAPIDS_COLUMN_MAPPINGS: + TIMESTAMP: timestamp + DEVICE_ID: device_id + BT_ADDRESS: bt_address + BT_NAME: bt_name + BT_RSSI: bt_rssi + MUTATION: + COLUMN_MAPPINGS: + SCRIPTS: # List any python or r scripts that mutate your raw data + IOS: + RAPIDS_COLUMN_MAPPINGS: + TIMESTAMP: timestamp + DEVICE_ID: device_id + BT_ADDRESS: bt_address + BT_NAME: bt_name + BT_RSSI: bt_rssi + MUTATION: + COLUMN_MAPPINGS: + SCRIPTS: # List any python or r scripts that mutate your raw data + +PHONE_CALLS: + ANDROID: + RAPIDS_COLUMN_MAPPINGS: + TIMESTAMP: timestamp + DEVICE_ID: device_id + CALL_TYPE: call_type + CALL_DURATION: call_duration + TRACE: trace + MUTATION: + COLUMN_MAPPINGS: + SCRIPTS: # List any python or r scripts that mutate your raw data + IOS: + RAPIDS_COLUMN_MAPPINGS: + TIMESTAMP: timestamp + DEVICE_ID: device_id + CALL_TYPE: FLAG_TO_MUTATE + CALL_DURATION: call_duration + TRACE: trace + MUTATION: + COLUMN_MAPPINGS: + CALL_TYPE: call_type + SCRIPTS: + - "src/data/streams/mutations/phone/aware/calls_ios_unification.R" + +PHONE_CONVERSATION: + ANDROID: + RAPIDS_COLUMN_MAPPINGS: + TIMESTAMP: timestamp + DEVICE_ID: device_id + DOUBLE_ENERGY: double_energy + INFERENCE: inference + DOUBLE_CONVO_START: double_convo_start + DOUBLE_CONVO_END: double_convo_end + MUTATION: + COLUMN_MAPPINGS: + SCRIPTS: # List any python or r scripts that mutate your raw data + IOS: + RAPIDS_COLUMN_MAPPINGS: + TIMESTAMP: timestamp + DEVICE_ID: device_id + DOUBLE_ENERGY: double_energy + INFERENCE: inference + DOUBLE_CONVO_START: double_convo_start + DOUBLE_CONVO_END: double_convo_end + MUTATION: + COLUMN_MAPPINGS: + SCRIPTS: # List any python or r scripts that mutate your raw data + - "src/data/streams/mutations/phone/aware/conversation_ios_timestamp.R" + +PHONE_KEYBOARD: + ANDROID: + RAPIDS_COLUMN_MAPPINGS: + TIMESTAMP: timestamp + DEVICE_ID: device_id + PACKAGE_NAME: package_name + BEFORE_TEXT: before_text + CURRENT_TEXT: current_text + IS_PASSWORD: is_password + MUTATION: + COLUMN_MAPPINGS: + SCRIPTS: # List any python or r scripts that mutate your raw data + +PHONE_LIGHT: + ANDROID: + RAPIDS_COLUMN_MAPPINGS: + TIMESTAMP: timestamp + DEVICE_ID: device_id + DOUBLE_LIGHT_LUX: double_light_lux + ACCURACY: accuracy + MUTATION: + COLUMN_MAPPINGS: + SCRIPTS: # List any python or r scripts that mutate your raw data + +PHONE_LOCATIONS: + ANDROID: + RAPIDS_COLUMN_MAPPINGS: + TIMESTAMP: timestamp + DEVICE_ID: device_id + DOUBLE_LATITUDE: double_latitude + DOUBLE_LONGITUDE: double_longitude + DOUBLE_BEARING: double_bearing + DOUBLE_SPEED: double_speed + DOUBLE_ALTITUDE: double_altitude + PROVIDER: provider + ACCURACY: accuracy + MUTATION: + COLUMN_MAPPINGS: + SCRIPTS: # List any python or r scripts that mutate your raw data + IOS: + RAPIDS_COLUMN_MAPPINGS: + TIMESTAMP: timestamp + DEVICE_ID: device_id + DOUBLE_LATITUDE: double_latitude + DOUBLE_LONGITUDE: double_longitude + DOUBLE_BEARING: double_bearing + DOUBLE_SPEED: double_speed + DOUBLE_ALTITUDE: double_altitude + PROVIDER: provider + ACCURACY: accuracy + MUTATION: + COLUMN_MAPPINGS: + SCRIPTS: # List any python or r scripts that mutate your raw data + +PHONE_LOG: + ANDROID: + RAPIDS_COLUMN_MAPPINGS: + TIMESTAMP: timestamp + DEVICE_ID: device_id + LOG_MESSAGE: log_message + MUTATION: + COLUMN_MAPPINGS: + SCRIPTS: # List any python or r scripts that mutate your raw data + IOS: + RAPIDS_COLUMN_MAPPINGS: + TIMESTAMP: timestamp + DEVICE_ID: device_id + LOG_MESSAGE: log_message + MUTATION: + COLUMN_MAPPINGS: + SCRIPTS: # List any python or r scripts that mutate your raw data + +PHONE_MESSAGES: + ANDROID: + RAPIDS_COLUMN_MAPPINGS: + TIMESTAMP: timestamp + DEVICE_ID: device_id + MESSAGE_TYPE: message_type + TRACE: trace + MUTATION: + COLUMN_MAPPINGS: + SCRIPTS: # List any python or r scripts that mutate your raw data + +PHONE_SCREEN: + ANDROID: + RAPIDS_COLUMN_MAPPINGS: + TIMESTAMP: timestamp + DEVICE_ID: device_id + SCREEN_STATUS: screen_status + MUTATION: + COLUMN_MAPPINGS: + SCRIPTS: # List any python or r scripts that mutate your raw data + IOS: + RAPIDS_COLUMN_MAPPINGS: + TIMESTAMP: timestamp + DEVICE_ID: device_id + SCREEN_STATUS: FLAG_TO_MUTATE + MUTATION: + COLUMN_MAPPINGS: + SCREEN_STATUS: screen_status + SCRIPTS: # List any python or r scripts that mutate your raw data + - "src/data/streams/mutations/phone/aware/screen_ios_unification.R" + +PHONE_WIFI_CONNECTED: + ANDROID: + RAPIDS_COLUMN_MAPPINGS: + TIMESTAMP: timestamp + DEVICE_ID: device_id + MAC_ADDRESS: mac_address + SSID: ssid + BSSID: bssid + MUTATION: + COLUMN_MAPPINGS: + SCRIPTS: # List any python or r scripts that mutate your raw data + IOS: + RAPIDS_COLUMN_MAPPINGS: + TIMESTAMP: timestamp + DEVICE_ID: device_id + MAC_ADDRESS: mac_address + SSID: ssid + BSSID: bssid + MUTATION: + COLUMN_MAPPINGS: + SCRIPTS: # List any python or r scripts that mutate your raw data + +PHONE_WIFI_VISIBLE: + ANDROID: + RAPIDS_COLUMN_MAPPINGS: + TIMESTAMP: timestamp + DEVICE_ID: device_id + SSID: ssid + BSSID: bssid + SECURITY: security + FREQUENCY: frequency + RSSI: rssi + MUTATION: + COLUMN_MAPPINGS: + SCRIPTS: # List any python or r scripts that mutate your raw data + IOS: + RAPIDS_COLUMN_MAPPINGS: + TIMESTAMP: timestamp + DEVICE_ID: device_id + SSID: ssid + BSSID: bssid + SECURITY: security + FREQUENCY: frequency + RSSI: rssi + MUTATION: + COLUMN_MAPPINGS: + SCRIPTS: # List any python or r scripts that mutate your raw data + diff --git a/tools/config.schema.yaml b/tools/config.schema.yaml index 01e7ec02..61c69335 100644 --- a/tools/config.schema.yaml +++ b/tools/config.schema.yaml @@ -206,11 +206,32 @@ properties: PHONE_DATA_STREAMS: type: object + required: ["USE","aware_mysql", "aware_mysql_split", "aware_csv", "aware_influxdb"] properties: USE: type: string + enum: ["aware_mysql", "aware_mysql_split", "aware_csv", "aware_influxdb"] aware_mysql: type: object + required: ["DATABASE_GROUP"] + properties: + DATABASE_GROUP: + type: string + aware_mysql_split: + type: object + required: ["DATABASE_GROUP"] + properties: + DATABASE_GROUP: + type: string + aware_csv: + type: object + required: ["FOLDER"] + properties: + FOLDER: + type: string + aware_influxdb: + type: object + required: ["DATABASE_GROUP"] properties: DATABASE_GROUP: type: string @@ -812,9 +833,11 @@ properties: FITBIT_DATA_STREAMS: type: object + required: ["USE","fitbitjson_mysql", "fitbitparsed_mysql", "fitbitjson_csv", "fitbitparsed_csv"] properties: USE: type: string + enum: ["fitbitjson_mysql", "fitbitparsed_mysql", "fitbitjson_csv", "fitbitparsed_csv"] fitbitjson_mysql: type: object required: [DATABASE_GROUP, SLEEP_SUMMARY_LAST_NIGHT_END]