Merge remote-tracking branch 'origin/master'
commit
ed5314aa98
|
@ -1,4 +1,9 @@
|
||||||
# Change Log
|
# Change Log
|
||||||
|
## v1.8.0
|
||||||
|
- Add data stream for AWARE Micro server
|
||||||
|
- Fix the NA bug in PHONE_LOCATIONS BARNETT provider
|
||||||
|
- Fix the bug of data type for call_duration field
|
||||||
|
- Fix the index bug of heatmap_sensors_per_minute_per_time_segment
|
||||||
## v1.7.1
|
## v1.7.1
|
||||||
- Update docs for Git Flow section
|
- Update docs for Git Flow section
|
||||||
- Update RAPIDS paper information
|
- Update RAPIDS paper information
|
||||||
|
|
|
@ -0,0 +1,15 @@
|
||||||
|
# `aware_micro_mysql`
|
||||||
|
|
||||||
|
This [data stream](../../datastreams/data-streams-introduction) handles iOS and Android sensor data collected with the [AWARE Framework's](https://awareframework.com/) [AWARE Micro](https://github.com/denzilferreira/aware-micro) server and stored in a MySQL database.
|
||||||
|
|
||||||
|
## Container
|
||||||
|
A MySQL database with a table per sensor, each containing the data for all participants. Sensor data is stored in a JSON field within each table called `data`
|
||||||
|
|
||||||
|
The script to connect and download data from this container is at:
|
||||||
|
```bash
|
||||||
|
src/data/streams/aware_micro_mysql/container.R
|
||||||
|
```
|
||||||
|
|
||||||
|
## Format
|
||||||
|
|
||||||
|
--8<---- "docs/snippets/aware_format.md"
|
|
@ -16,6 +16,7 @@ For reference, these are the data streams we currently support:
|
||||||
| Data Stream | Device | Format | Container | Docs
|
| Data Stream | Device | Format | Container | Docs
|
||||||
|--|--|--|--|--|
|
|--|--|--|--|--|
|
||||||
| `aware_mysql`| Phone | AWARE app | MySQL | [link](../aware-mysql)
|
| `aware_mysql`| Phone | AWARE app | MySQL | [link](../aware-mysql)
|
||||||
|
| `aware_micro_mysql`| Phone | AWARE Micro server | MySQL | [link](../aware-micro-mysql)
|
||||||
| `aware_csv`| Phone | AWARE app | CSV files | [link](../aware-csv)
|
| `aware_csv`| Phone | AWARE app | CSV files | [link](../aware-csv)
|
||||||
| `aware_influxdb` (beta)| Phone | AWARE app | InfluxDB | [link](../aware-influxdb)
|
| `aware_influxdb` (beta)| Phone | AWARE app | InfluxDB | [link](../aware-influxdb)
|
||||||
| `fitbitjson_mysql`| Fitbit | JSON (per [Fitbit's API](https://dev.fitbit.com/build/reference/web-api/)) | MySQL | [link](../fitbitjson-mysql)
|
| `fitbitjson_mysql`| Fitbit | JSON (per [Fitbit's API](https://dev.fitbit.com/build/reference/web-api/)) | MySQL | [link](../fitbitjson-mysql)
|
||||||
|
|
|
@ -85,6 +85,7 @@ nav:
|
||||||
- Introduction: datastreams/data-streams-introduction.md
|
- Introduction: datastreams/data-streams-introduction.md
|
||||||
- Phone:
|
- Phone:
|
||||||
- aware_mysql: datastreams/aware-mysql.md
|
- aware_mysql: datastreams/aware-mysql.md
|
||||||
|
- aware_micro_mysql: datastreams/aware-micro-mysql.md
|
||||||
- aware_csv: datastreams/aware-csv.md
|
- aware_csv: datastreams/aware-csv.md
|
||||||
- aware_influxdb (beta): datastreams/aware-influxdb.md
|
- aware_influxdb (beta): datastreams/aware-influxdb.md
|
||||||
- Mandatory Phone Format: datastreams/mandatory-phone-format.md
|
- Mandatory Phone Format: datastreams/mandatory-phone-format.md
|
||||||
|
|
|
@ -0,0 +1,85 @@
|
||||||
|
# if you need a new package, you should add it with renv::install(package) so your renv venv is updated
|
||||||
|
library(RMariaDB)
|
||||||
|
library(yaml)
|
||||||
|
|
||||||
|
#' @description
|
||||||
|
#' Auxiliary function to parse the connection credentials from a specifc group in ./credentials.yaml
|
||||||
|
#' You can reause most of this function if you are connection to a DB or Web API.
|
||||||
|
#' It's OK to delete this function if you don't need credentials, e.g., you are pulling data from a CSV for example.
|
||||||
|
#' @param group the yaml key containing the credentials to connect to a database
|
||||||
|
#' @preturn dbEngine a database engine (connection) ready to perform queries
|
||||||
|
get_db_engine <- function(group){
|
||||||
|
# The working dir is aways RAPIDS root folder, so your credentials file is always /credentials.yaml
|
||||||
|
credentials <- read_yaml("./credentials.yaml")
|
||||||
|
if(!group %in% names(credentials))
|
||||||
|
stop(paste("The credentials group",group, "does not exist in ./credentials.yaml. The only groups that exist in that file are:", paste(names(credentials), collapse = ","), ". Did you forget to set the group in [PHONE_DATA_STREAMS][aware_mysql][DATABASE_GROUP] in config.yaml?"))
|
||||||
|
dbEngine <- dbConnect(MariaDB(), db = credentials[[group]][["database"]],
|
||||||
|
username = credentials[[group]][["user"]],
|
||||||
|
password = credentials[[group]][["password"]],
|
||||||
|
host = credentials[[group]][["host"]],
|
||||||
|
port = credentials[[group]][["port"]])
|
||||||
|
return(dbEngine)
|
||||||
|
}
|
||||||
|
|
||||||
|
# This file gets executed for each PHONE_SENSOR of each participant
|
||||||
|
# If you are connecting to a database the env file containing its credentials is available at "./.env"
|
||||||
|
# If you are reading a CSV file instead of a DB table, the @param sensor_container wil contain the file path as set in config.yaml
|
||||||
|
# You are not bound to databases or files, you can query a web API or whatever data source you need.
|
||||||
|
|
||||||
|
#' @description
|
||||||
|
#' RAPIDS allows users to use the keyword "infer" (previously "multiple") to automatically infer the mobile Operative System a device was running.
|
||||||
|
#' If you have a way to infer the OS of a device ID, implement this function. For example, for AWARE data we use the "aware_device" table.
|
||||||
|
#'
|
||||||
|
#' If you don't have a way to infer the OS, call stop("Error Message") so other users know they can't use "infer" or the inference failed,
|
||||||
|
#' and they have to assign the OS manually in the participant file
|
||||||
|
#'
|
||||||
|
#' @param stream_parameters The PHONE_STREAM_PARAMETERS key in config.yaml. If you need specific parameters add them there.
|
||||||
|
#' @param device A device ID string
|
||||||
|
#' @return The OS the device ran, "android" or "ios"
|
||||||
|
|
||||||
|
infer_device_os <- function(stream_parameters, device){
|
||||||
|
dbEngine <- get_db_engine(stream_parameters$DATABASE_GROUP)
|
||||||
|
query <- paste0("SELECT device_id,brand FROM aware_device WHERE device_id = '", device, "'")
|
||||||
|
message(paste0("Executing the following query to infer phone OS: ", query))
|
||||||
|
os <- dbGetQuery(dbEngine, query)
|
||||||
|
dbDisconnect(dbEngine)
|
||||||
|
|
||||||
|
if(nrow(os) > 0)
|
||||||
|
return(os %>% mutate(os = ifelse(brand == "iPhone", "ios", "android")) %>% pull(os))
|
||||||
|
else
|
||||||
|
stop(paste("We cannot infer the OS of the following device id because it does not exist in the aware_device table:", device))
|
||||||
|
|
||||||
|
return(os)
|
||||||
|
}
|
||||||
|
|
||||||
|
#' @description
|
||||||
|
#' Gets the sensor data for a specific device id from a database table, file or whatever source you want to query
|
||||||
|
#'
|
||||||
|
#' @param stream_parameters The PHONE_STREAM_PARAMETERS key in config.yaml. If you need specific parameters add them there.
|
||||||
|
#' @param device A device ID string
|
||||||
|
#' @param sensor_container database table or file containing the sensor data for all participants. This is the PHONE_SENSOR[CONTAINER] key in config.yaml
|
||||||
|
#' @param columns the columns needed from this sensor (we recommend to only return these columns instead of every column in sensor_container)
|
||||||
|
#' @return A dataframe with the sensor data for device
|
||||||
|
|
||||||
|
pull_data <- function(stream_parameters, device, sensor, sensor_container, columns){
|
||||||
|
dbEngine <- get_db_engine(stream_parameters$DATABASE_GROUP)
|
||||||
|
|
||||||
|
select_items <- c()
|
||||||
|
for (column in columns) {
|
||||||
|
select_items <- append(select_items, paste0("data->>'$.", column, "' ", column))
|
||||||
|
}
|
||||||
|
|
||||||
|
query <- paste0("SELECT ", paste(select_items, collapse = ",")," FROM ", sensor_container, " WHERE ", columns$DEVICE_ID ," = '", device,"'")
|
||||||
|
|
||||||
|
# Letting the user know what we are doing
|
||||||
|
message(paste0("Executing the following query to download data: ", query))
|
||||||
|
sensor_data <- dbGetQuery(dbEngine, query)
|
||||||
|
|
||||||
|
dbDisconnect(dbEngine)
|
||||||
|
|
||||||
|
if(nrow(sensor_data) == 0)
|
||||||
|
warning(paste("The device '", device,"' did not have data in ", sensor_container))
|
||||||
|
|
||||||
|
return(sensor_data)
|
||||||
|
}
|
||||||
|
|
|
@ -0,0 +1,337 @@
|
||||||
|
PHONE_ACCELEROMETER:
|
||||||
|
ANDROID:
|
||||||
|
RAPIDS_COLUMN_MAPPINGS:
|
||||||
|
TIMESTAMP: timestamp
|
||||||
|
DEVICE_ID: device_id
|
||||||
|
DOUBLE_VALUES_0: double_values_0
|
||||||
|
DOUBLE_VALUES_1: double_values_1
|
||||||
|
DOUBLE_VALUES_2: double_values_2
|
||||||
|
MUTATION:
|
||||||
|
COLUMN_MAPPINGS:
|
||||||
|
SCRIPTS: # List any python or r scripts that mutate your raw data
|
||||||
|
IOS:
|
||||||
|
RAPIDS_COLUMN_MAPPINGS:
|
||||||
|
TIMESTAMP: timestamp
|
||||||
|
DEVICE_ID: device_id
|
||||||
|
DOUBLE_VALUES_0: double_values_0
|
||||||
|
DOUBLE_VALUES_1: double_values_1
|
||||||
|
DOUBLE_VALUES_2: double_values_2
|
||||||
|
MUTATION:
|
||||||
|
COLUMN_MAPPINGS:
|
||||||
|
SCRIPTS: # List any python or r scripts that mutate your raw data
|
||||||
|
|
||||||
|
PHONE_ACTIVITY_RECOGNITION:
|
||||||
|
ANDROID:
|
||||||
|
RAPIDS_COLUMN_MAPPINGS:
|
||||||
|
TIMESTAMP: timestamp
|
||||||
|
DEVICE_ID: device_id
|
||||||
|
ACTIVITY_NAME: activity_name
|
||||||
|
ACTIVITY_TYPE: activity_type
|
||||||
|
CONFIDENCE: confidence
|
||||||
|
MUTATION:
|
||||||
|
COLUMN_MAPPINGS:
|
||||||
|
SCRIPTS: # List any python or r scripts that mutate your raw data
|
||||||
|
IOS:
|
||||||
|
RAPIDS_COLUMN_MAPPINGS:
|
||||||
|
TIMESTAMP: timestamp
|
||||||
|
DEVICE_ID: device_id
|
||||||
|
ACTIVITY_NAME: FLAG_TO_MUTATE
|
||||||
|
ACTIVITY_TYPE: FLAG_TO_MUTATE
|
||||||
|
CONFIDENCE: FLAG_TO_MUTATE
|
||||||
|
MUTATION:
|
||||||
|
COLUMN_MAPPINGS:
|
||||||
|
ACTIVITIES: activities
|
||||||
|
CONFIDENCE: confidence
|
||||||
|
SCRIPTS: # List any python or r scripts that mutate your raw data
|
||||||
|
- "src/data/streams/mutations/phone/aware/activity_recogniton_ios_unification.R"
|
||||||
|
|
||||||
|
PHONE_APPLICATIONS_CRASHES:
|
||||||
|
ANDROID:
|
||||||
|
RAPIDS_COLUMN_MAPPINGS:
|
||||||
|
TIMESTAMP: timestamp
|
||||||
|
DEVICE_ID: device_id
|
||||||
|
PACKAGE_NAME: package_name
|
||||||
|
APPLICATION_NAME: application_name
|
||||||
|
APPLICATION_VERSION: application_version
|
||||||
|
ERROR_SHORT: error_short
|
||||||
|
ERROR_LONG: error_long
|
||||||
|
ERROR_CONDITION: error_condition
|
||||||
|
IS_SYSTEM_APP: is_system_app
|
||||||
|
MUTATION:
|
||||||
|
COLUMN_MAPPINGS:
|
||||||
|
SCRIPTS: # List any python or r scripts that mutate your raw data
|
||||||
|
|
||||||
|
PHONE_APPLICATIONS_FOREGROUND:
|
||||||
|
ANDROID:
|
||||||
|
RAPIDS_COLUMN_MAPPINGS:
|
||||||
|
TIMESTAMP: timestamp
|
||||||
|
DEVICE_ID: device_id
|
||||||
|
PACKAGE_NAME: package_name
|
||||||
|
APPLICATION_NAME: application_name
|
||||||
|
IS_SYSTEM_APP: is_system_app
|
||||||
|
MUTATION:
|
||||||
|
COLUMN_MAPPINGS:
|
||||||
|
SCRIPTS: # List any python or r scripts that mutate your raw data
|
||||||
|
|
||||||
|
PHONE_APPLICATIONS_NOTIFICATIONS:
|
||||||
|
ANDROID:
|
||||||
|
RAPIDS_COLUMN_MAPPINGS:
|
||||||
|
TIMESTAMP: timestamp
|
||||||
|
DEVICE_ID: device_id
|
||||||
|
PACKAGE_NAME: package_name
|
||||||
|
APPLICATION_NAME: application_name
|
||||||
|
TEXT: text
|
||||||
|
SOUND: sound
|
||||||
|
VIBRATE: vibrate
|
||||||
|
DEFAULTS: defaults
|
||||||
|
FLAGS: flags
|
||||||
|
MUTATION:
|
||||||
|
COLUMN_MAPPINGS:
|
||||||
|
SCRIPTS: # List any python or r scripts that mutate your raw data
|
||||||
|
|
||||||
|
PHONE_BATTERY:
|
||||||
|
ANDROID:
|
||||||
|
RAPIDS_COLUMN_MAPPINGS:
|
||||||
|
TIMESTAMP: timestamp
|
||||||
|
DEVICE_ID: device_id
|
||||||
|
BATTERY_STATUS: battery_status
|
||||||
|
BATTERY_LEVEL: battery_level
|
||||||
|
BATTERY_SCALE: battery_scale
|
||||||
|
MUTATION:
|
||||||
|
COLUMN_MAPPINGS:
|
||||||
|
SCRIPTS: # List any python or r scripts that mutate your raw data
|
||||||
|
IOS:
|
||||||
|
RAPIDS_COLUMN_MAPPINGS:
|
||||||
|
TIMESTAMP: timestamp
|
||||||
|
DEVICE_ID: device_id
|
||||||
|
BATTERY_STATUS: FLAG_TO_MUTATE
|
||||||
|
BATTERY_LEVEL: battery_level
|
||||||
|
BATTERY_SCALE: battery_scale
|
||||||
|
MUTATION:
|
||||||
|
COLUMN_MAPPINGS:
|
||||||
|
BATTERY_STATUS: battery_status
|
||||||
|
SCRIPTS:
|
||||||
|
- "src/data/streams/mutations/phone/aware/battery_ios_unification.R"
|
||||||
|
|
||||||
|
PHONE_BLUETOOTH:
|
||||||
|
ANDROID:
|
||||||
|
RAPIDS_COLUMN_MAPPINGS:
|
||||||
|
TIMESTAMP: timestamp
|
||||||
|
DEVICE_ID: device_id
|
||||||
|
BT_ADDRESS: bt_address
|
||||||
|
BT_NAME: bt_name
|
||||||
|
BT_RSSI: bt_rssi
|
||||||
|
MUTATION:
|
||||||
|
COLUMN_MAPPINGS:
|
||||||
|
SCRIPTS: # List any python or r scripts that mutate your raw data
|
||||||
|
IOS:
|
||||||
|
RAPIDS_COLUMN_MAPPINGS:
|
||||||
|
TIMESTAMP: timestamp
|
||||||
|
DEVICE_ID: device_id
|
||||||
|
BT_ADDRESS: bt_address
|
||||||
|
BT_NAME: bt_name
|
||||||
|
BT_RSSI: bt_rssi
|
||||||
|
MUTATION:
|
||||||
|
COLUMN_MAPPINGS:
|
||||||
|
SCRIPTS: # List any python or r scripts that mutate your raw data
|
||||||
|
|
||||||
|
PHONE_CALLS:
|
||||||
|
ANDROID:
|
||||||
|
RAPIDS_COLUMN_MAPPINGS:
|
||||||
|
TIMESTAMP: timestamp
|
||||||
|
DEVICE_ID: device_id
|
||||||
|
CALL_TYPE: call_type
|
||||||
|
CALL_DURATION: call_duration
|
||||||
|
TRACE: trace
|
||||||
|
MUTATION:
|
||||||
|
COLUMN_MAPPINGS:
|
||||||
|
SCRIPTS: # List any python or r scripts that mutate your raw data
|
||||||
|
IOS:
|
||||||
|
RAPIDS_COLUMN_MAPPINGS:
|
||||||
|
TIMESTAMP: timestamp
|
||||||
|
DEVICE_ID: device_id
|
||||||
|
CALL_TYPE: FLAG_TO_MUTATE
|
||||||
|
CALL_DURATION: call_duration
|
||||||
|
TRACE: trace
|
||||||
|
MUTATION:
|
||||||
|
COLUMN_MAPPINGS:
|
||||||
|
CALL_TYPE: call_type
|
||||||
|
SCRIPTS:
|
||||||
|
- "src/data/streams/mutations/phone/aware/calls_ios_unification.R"
|
||||||
|
|
||||||
|
PHONE_CONVERSATION:
|
||||||
|
ANDROID:
|
||||||
|
RAPIDS_COLUMN_MAPPINGS:
|
||||||
|
TIMESTAMP: timestamp
|
||||||
|
DEVICE_ID: device_id
|
||||||
|
DOUBLE_ENERGY: double_energy
|
||||||
|
INFERENCE: inference
|
||||||
|
DOUBLE_CONVO_START: double_convo_start
|
||||||
|
DOUBLE_CONVO_END: double_convo_end
|
||||||
|
MUTATION:
|
||||||
|
COLUMN_MAPPINGS:
|
||||||
|
SCRIPTS: # List any python or r scripts that mutate your raw data
|
||||||
|
IOS:
|
||||||
|
RAPIDS_COLUMN_MAPPINGS:
|
||||||
|
TIMESTAMP: timestamp
|
||||||
|
DEVICE_ID: device_id
|
||||||
|
DOUBLE_ENERGY: double_energy
|
||||||
|
INFERENCE: inference
|
||||||
|
DOUBLE_CONVO_START: double_convo_start
|
||||||
|
DOUBLE_CONVO_END: double_convo_end
|
||||||
|
MUTATION:
|
||||||
|
COLUMN_MAPPINGS:
|
||||||
|
SCRIPTS: # List any python or r scripts that mutate your raw data
|
||||||
|
- "src/data/streams/mutations/phone/aware/conversation_ios_timestamp.R"
|
||||||
|
|
||||||
|
PHONE_KEYBOARD:
|
||||||
|
ANDROID:
|
||||||
|
RAPIDS_COLUMN_MAPPINGS:
|
||||||
|
TIMESTAMP: timestamp
|
||||||
|
DEVICE_ID: device_id
|
||||||
|
PACKAGE_NAME: package_name
|
||||||
|
BEFORE_TEXT: before_text
|
||||||
|
CURRENT_TEXT: current_text
|
||||||
|
IS_PASSWORD: is_password
|
||||||
|
MUTATION:
|
||||||
|
COLUMN_MAPPINGS:
|
||||||
|
SCRIPTS: # List any python or r scripts that mutate your raw data
|
||||||
|
|
||||||
|
PHONE_LIGHT:
|
||||||
|
ANDROID:
|
||||||
|
RAPIDS_COLUMN_MAPPINGS:
|
||||||
|
TIMESTAMP: timestamp
|
||||||
|
DEVICE_ID: device_id
|
||||||
|
DOUBLE_LIGHT_LUX: double_light_lux
|
||||||
|
ACCURACY: accuracy
|
||||||
|
MUTATION:
|
||||||
|
COLUMN_MAPPINGS:
|
||||||
|
SCRIPTS: # List any python or r scripts that mutate your raw data
|
||||||
|
|
||||||
|
PHONE_LOCATIONS:
|
||||||
|
ANDROID:
|
||||||
|
RAPIDS_COLUMN_MAPPINGS:
|
||||||
|
TIMESTAMP: timestamp
|
||||||
|
DEVICE_ID: device_id
|
||||||
|
DOUBLE_LATITUDE: double_latitude
|
||||||
|
DOUBLE_LONGITUDE: double_longitude
|
||||||
|
DOUBLE_BEARING: double_bearing
|
||||||
|
DOUBLE_SPEED: double_speed
|
||||||
|
DOUBLE_ALTITUDE: double_altitude
|
||||||
|
PROVIDER: provider
|
||||||
|
ACCURACY: accuracy
|
||||||
|
MUTATION:
|
||||||
|
COLUMN_MAPPINGS:
|
||||||
|
SCRIPTS: # List any python or r scripts that mutate your raw data
|
||||||
|
IOS:
|
||||||
|
RAPIDS_COLUMN_MAPPINGS:
|
||||||
|
TIMESTAMP: timestamp
|
||||||
|
DEVICE_ID: device_id
|
||||||
|
DOUBLE_LATITUDE: double_latitude
|
||||||
|
DOUBLE_LONGITUDE: double_longitude
|
||||||
|
DOUBLE_BEARING: double_bearing
|
||||||
|
DOUBLE_SPEED: double_speed
|
||||||
|
DOUBLE_ALTITUDE: double_altitude
|
||||||
|
PROVIDER: provider
|
||||||
|
ACCURACY: accuracy
|
||||||
|
MUTATION:
|
||||||
|
COLUMN_MAPPINGS:
|
||||||
|
SCRIPTS: # List any python or r scripts that mutate your raw data
|
||||||
|
|
||||||
|
PHONE_LOG:
|
||||||
|
ANDROID:
|
||||||
|
RAPIDS_COLUMN_MAPPINGS:
|
||||||
|
TIMESTAMP: timestamp
|
||||||
|
DEVICE_ID: device_id
|
||||||
|
LOG_MESSAGE: log_message
|
||||||
|
MUTATION:
|
||||||
|
COLUMN_MAPPINGS:
|
||||||
|
SCRIPTS: # List any python or r scripts that mutate your raw data
|
||||||
|
IOS:
|
||||||
|
RAPIDS_COLUMN_MAPPINGS:
|
||||||
|
TIMESTAMP: timestamp
|
||||||
|
DEVICE_ID: device_id
|
||||||
|
LOG_MESSAGE: log_message
|
||||||
|
MUTATION:
|
||||||
|
COLUMN_MAPPINGS:
|
||||||
|
SCRIPTS: # List any python or r scripts that mutate your raw data
|
||||||
|
|
||||||
|
PHONE_MESSAGES:
|
||||||
|
ANDROID:
|
||||||
|
RAPIDS_COLUMN_MAPPINGS:
|
||||||
|
TIMESTAMP: timestamp
|
||||||
|
DEVICE_ID: device_id
|
||||||
|
MESSAGE_TYPE: message_type
|
||||||
|
TRACE: trace
|
||||||
|
MUTATION:
|
||||||
|
COLUMN_MAPPINGS:
|
||||||
|
SCRIPTS: # List any python or r scripts that mutate your raw data
|
||||||
|
|
||||||
|
PHONE_SCREEN:
|
||||||
|
ANDROID:
|
||||||
|
RAPIDS_COLUMN_MAPPINGS:
|
||||||
|
TIMESTAMP: timestamp
|
||||||
|
DEVICE_ID: device_id
|
||||||
|
SCREEN_STATUS: screen_status
|
||||||
|
MUTATION:
|
||||||
|
COLUMN_MAPPINGS:
|
||||||
|
SCRIPTS: # List any python or r scripts that mutate your raw data
|
||||||
|
IOS:
|
||||||
|
RAPIDS_COLUMN_MAPPINGS:
|
||||||
|
TIMESTAMP: timestamp
|
||||||
|
DEVICE_ID: device_id
|
||||||
|
SCREEN_STATUS: FLAG_TO_MUTATE
|
||||||
|
MUTATION:
|
||||||
|
COLUMN_MAPPINGS:
|
||||||
|
SCREEN_STATUS: screen_status
|
||||||
|
SCRIPTS: # List any python or r scripts that mutate your raw data
|
||||||
|
- "src/data/streams/mutations/phone/aware/screen_ios_unification.R"
|
||||||
|
|
||||||
|
PHONE_WIFI_CONNECTED:
|
||||||
|
ANDROID:
|
||||||
|
RAPIDS_COLUMN_MAPPINGS:
|
||||||
|
TIMESTAMP: timestamp
|
||||||
|
DEVICE_ID: device_id
|
||||||
|
MAC_ADDRESS: mac_address
|
||||||
|
SSID: ssid
|
||||||
|
BSSID: bssid
|
||||||
|
MUTATION:
|
||||||
|
COLUMN_MAPPINGS:
|
||||||
|
SCRIPTS: # List any python or r scripts that mutate your raw data
|
||||||
|
IOS:
|
||||||
|
RAPIDS_COLUMN_MAPPINGS:
|
||||||
|
TIMESTAMP: timestamp
|
||||||
|
DEVICE_ID: device_id
|
||||||
|
MAC_ADDRESS: mac_address
|
||||||
|
SSID: ssid
|
||||||
|
BSSID: bssid
|
||||||
|
MUTATION:
|
||||||
|
COLUMN_MAPPINGS:
|
||||||
|
SCRIPTS: # List any python or r scripts that mutate your raw data
|
||||||
|
|
||||||
|
PHONE_WIFI_VISIBLE:
|
||||||
|
ANDROID:
|
||||||
|
RAPIDS_COLUMN_MAPPINGS:
|
||||||
|
TIMESTAMP: timestamp
|
||||||
|
DEVICE_ID: device_id
|
||||||
|
SSID: ssid
|
||||||
|
BSSID: bssid
|
||||||
|
SECURITY: security
|
||||||
|
FREQUENCY: frequency
|
||||||
|
RSSI: rssi
|
||||||
|
MUTATION:
|
||||||
|
COLUMN_MAPPINGS:
|
||||||
|
SCRIPTS: # List any python or r scripts that mutate your raw data
|
||||||
|
IOS:
|
||||||
|
RAPIDS_COLUMN_MAPPINGS:
|
||||||
|
TIMESTAMP: timestamp
|
||||||
|
DEVICE_ID: device_id
|
||||||
|
SSID: ssid
|
||||||
|
BSSID: bssid
|
||||||
|
SECURITY: security
|
||||||
|
FREQUENCY: frequency
|
||||||
|
RSSI: rssi
|
||||||
|
MUTATION:
|
||||||
|
COLUMN_MAPPINGS:
|
||||||
|
SCRIPTS: # List any python or r scripts that mutate your raw data
|
||||||
|
|
|
@ -39,7 +39,7 @@ unify_ios_calls <- function(ios_calls){
|
||||||
assigned_segments = first(assigned_segments))
|
assigned_segments = first(assigned_segments))
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
ios_calls <- ios_calls %>% summarise(call_type_sequence = paste(call_type, collapse = ","), call_duration = sum(call_duration), timestamp = first(timestamp), device_id = first(device_id))
|
ios_calls <- ios_calls %>% summarise(call_type_sequence = paste(call_type, collapse = ","), call_duration = sum(as.numeric(call_duration)), timestamp = first(timestamp), device_id = first(device_id))
|
||||||
}
|
}
|
||||||
ios_calls <- ios_calls %>% mutate(call_type = case_when(
|
ios_calls <- ios_calls %>% mutate(call_type = case_when(
|
||||||
call_type_sequence == "1,2,4" | call_type_sequence == "2,1,4" ~ 1, # incoming
|
call_type_sequence == "1,2,4" | call_type_sequence == "2,1,4" ~ 1, # incoming
|
||||||
|
|
|
@ -26,7 +26,9 @@ barnett_daily_features <- function(snakemake){
|
||||||
location <- location %>%
|
location <- location %>%
|
||||||
mutate(is_daily = str_detect(assigned_segments, paste0(".*#", datetime_start_regex, ",", datetime_end_regex, ".*")))
|
mutate(is_daily = str_detect(assigned_segments, paste0(".*#", datetime_start_regex, ",", datetime_end_regex, ".*")))
|
||||||
|
|
||||||
if(nrow(segment_labels) == 0 || nrow(location) == 0 || all(location$is_daily == FALSE) || (max(location$timestamp) - min(location$timestamp) < 86400000)){
|
does_not_span = nrow(segment_labels) == 0 || nrow(location) == 0 || all(location$is_daily == FALSE) || (max(location$timestamp) - min(location$timestamp) < 86400000)
|
||||||
|
|
||||||
|
if(is.na(does_not_span) || does_not_span){
|
||||||
warning("Barnett's location features cannot be computed for data or time segments that do not span one or more entire days (00:00:00 to 23:59:59). Values below point to the problem:",
|
warning("Barnett's location features cannot be computed for data or time segments that do not span one or more entire days (00:00:00 to 23:59:59). Values below point to the problem:",
|
||||||
"\nLocation data rows within a daily time segment: ", nrow(filter(location, is_daily)),
|
"\nLocation data rows within a daily time segment: ", nrow(filter(location, is_daily)),
|
||||||
"\nLocation data time span in days: ", round((max(location$timestamp) - min(location$timestamp)) / 86400000, 2)
|
"\nLocation data time span in days: ", round((max(location$timestamp) - min(location$timestamp)) / 86400000, 2)
|
||||||
|
|
|
@ -24,12 +24,12 @@ def colors2colorscale(colors):
|
||||||
def getDataForPlot(phone_data_yield_per_segment):
|
def getDataForPlot(phone_data_yield_per_segment):
|
||||||
# calculate the length (in minute) of per segment instance
|
# calculate the length (in minute) of per segment instance
|
||||||
phone_data_yield_per_segment["length"] = phone_data_yield_per_segment["timestamps_segment"].str.split(",").apply(lambda x: int((int(x[1])-int(x[0])) / (1000 * 60)))
|
phone_data_yield_per_segment["length"] = phone_data_yield_per_segment["timestamps_segment"].str.split(",").apply(lambda x: int((int(x[1])-int(x[0])) / (1000 * 60)))
|
||||||
# calculate the number of sensors logged at least one row of data per minute.
|
|
||||||
phone_data_yield_per_segment = phone_data_yield_per_segment.groupby(["local_segment", "length", "local_date", "local_hour", "local_minute"])[["sensor", "local_date_time"]].max().reset_index()
|
|
||||||
# extract local start datetime of the segment from "local_segment" column
|
# extract local start datetime of the segment from "local_segment" column
|
||||||
phone_data_yield_per_segment["local_segment_start_datetimes"] = pd.to_datetime(phone_data_yield_per_segment["local_segment"].apply(lambda x: x.split("#")[1].split(",")[0]))
|
phone_data_yield_per_segment["local_segment_start_datetimes"] = pd.to_datetime(phone_data_yield_per_segment["local_segment"].apply(lambda x: x.split("#")[1].split(",")[0]))
|
||||||
# calculate the number of minutes after local start datetime of the segment
|
# calculate the number of minutes after local start datetime of the segment
|
||||||
phone_data_yield_per_segment["minutes_after_segment_start"] = ((phone_data_yield_per_segment["local_date_time"] - phone_data_yield_per_segment["local_segment_start_datetimes"]) / pd.Timedelta(minutes=1)).astype("int")
|
phone_data_yield_per_segment["minutes_after_segment_start"] = ((phone_data_yield_per_segment["local_date_time"] - phone_data_yield_per_segment["local_segment_start_datetimes"]) / pd.Timedelta(minutes=1)).astype("int")
|
||||||
|
# calculate the number of sensors logged at least one row of data per minute.
|
||||||
|
phone_data_yield_per_segment = phone_data_yield_per_segment.groupby(["local_segment", "length", "local_segment_start_datetimes", "minutes_after_segment_start"])[["sensor"]].max().reset_index()
|
||||||
|
|
||||||
# impute missing rows with 0
|
# impute missing rows with 0
|
||||||
columns_for_full_index = phone_data_yield_per_segment[["local_segment_start_datetimes", "length"]].drop_duplicates(keep="first")
|
columns_for_full_index = phone_data_yield_per_segment[["local_segment_start_datetimes", "length"]].drop_duplicates(keep="first")
|
||||||
|
|
Loading…
Reference in New Issue