From 6e898beca5c32736b6496d8421ab271225fbe9e5 Mon Sep 17 00:00:00 2001 From: JulioV Date: Thu, 11 Mar 2021 19:32:11 -0500 Subject: [PATCH] Add aware_csv --- config.yaml | 3 +- docs/datastreams/aware-csv.md | 30 + docs/datastreams/aware-mysql.md | 608 +----------------- docs/datastreams/fitbitjson-csv.md | 9 + docs/datastreams/fitbitparsed-csv.md | 12 + docs/snippets/aware_format.md | 606 +++++++++++++++++ mkdocs.yml | 1 + src/data/datetime/assign_to_time_segment.R | 5 +- src/data/datetime/readable_datetime.R | 7 +- src/data/streams/aware_csv/container.R | 77 +++ src/data/streams/aware_csv/format.yaml | 315 +++++++++ src/data/streams/fitbitjson_csv/container.R | 4 +- src/data/streams/fitbitparsed_csv/container.R | 4 +- .../activity_recogniton_ios_unification.R | 3 +- src/data/streams/pull_phone_data.R | 2 +- src/data/streams/pull_wearable_data.R | 4 + 16 files changed, 1072 insertions(+), 618 deletions(-) create mode 100644 docs/datastreams/aware-csv.md create mode 100644 docs/snippets/aware_format.md create mode 100644 src/data/streams/aware_csv/container.R create mode 100644 src/data/streams/aware_csv/format.yaml diff --git a/config.yaml b/config.yaml index 7a84c63d..a7cd3117 100644 --- a/config.yaml +++ b/config.yaml @@ -52,7 +52,8 @@ PHONE_DATA_STREAMS: aware_mysql: DATABASE_GROUP: MY_GROUP - + aware_csv: + FOLDER: data/external/aware_csv # Sensors ------ # https://www.rapids.science/latest/features/phone-accelerometer/ diff --git a/docs/datastreams/aware-csv.md b/docs/datastreams/aware-csv.md new file mode 100644 index 00000000..5d78243b --- /dev/null +++ b/docs/datastreams/aware-csv.md @@ -0,0 +1,30 @@ +# `aware_csv` + +This [data stream](../../datastreams/data-streams-introduction) handles iOS and Android sensor data collected with the [AWARE Framework](https://awareframework.com/) and stored in CSV files. + +!!! warning + The CSV files have to use `,` as separator, `\` as escape character (do not escape `"` with `""`), and wrap any string columns with `"`. + + ??? example "Example of a valid CSV file" + ```csv + "_id","timestamp","device_id","activities","confidence","stationary","walking","running","automotive","cycling","unknown","label" + 1,1587528000000,"13dbc8a3-dae3-4834-823a-4bc96a7d459d","[\"stationary\"]",2,1,0,0,0,0,0,"" + 2,1587528060000,"13dbc8a3-dae3-4834-823a-4bc96a7d459d","[\"stationary\"]",2,1,0,0,0,0,0,"supplement" + 3,1587528120000,"13dbc8a3-dae3-4834-823a-4bc96a7d459d","[\"stationary\"]",2,1,0,0,0,0,0,"supplement" + 4,1587528180000,"13dbc8a3-dae3-4834-823a-4bc96a7d459d","[\"stationary\"]",2,1,0,0,0,0,0,"supplement" + 5,1587528240000,"13dbc8a3-dae3-4834-823a-4bc96a7d459d","[\"stationary\"]",2,1,0,0,0,0,0,"supplement" + 6,1587528300000,"13dbc8a3-dae3-4834-823a-4bc96a7d459d","[\"stationary\"]",2,1,0,0,0,0,0,"supplement" + 7,1587528360000,"13dbc8a3-dae3-4834-823a-4bc96a7d459d","[\"stationary\"]",2,1,0,0,0,0,0,"supplement" + ``` + +## Container +A CSV file per sensor, each containing the data for all participants. + +The script to connect and download data from this container is at: +```bash +src/data/streams/aware_csv/container.R +``` + +## Format + +--8<---- "docs/snippets/aware_format.md" diff --git a/docs/datastreams/aware-mysql.md b/docs/datastreams/aware-mysql.md index a47ae97d..dfcd9954 100644 --- a/docs/datastreams/aware-mysql.md +++ b/docs/datastreams/aware-mysql.md @@ -1,4 +1,4 @@ -# `aware-mysql` +# `aware_mysql` This [data stream](../../datastreams/data-streams-introduction) handles iOS and Android sensor data collected with the [AWARE Framework](https://awareframework.com/) and stored in a MySQL database. @@ -11,609 +11,5 @@ src/data/streams/aware_mysql/container.R ``` ## Format -If you collected sensor data with the vanilla (original) AWARE mobile clients you shouldn't need to modify this format (described below). - -Remember that a format maps and transforms columns in your raw data stream to the [mandatory columns RAPIDS needs](../mandatory-phone-format). - -The yaml file that describes the format of this data stream is at: -```bash -src/data/streams/aware_mysql/format.yaml -``` - -Stream columns named `FLAG_TO_MUTATE` means they are extracted based on the `MUTATION` section. You can refer to [OS complex mapping](../../datastreams/add-new-data-streams/#os-complex-mapping) for detailed information. - -!!! hint - The mappings in this stream (RAPIDS/Stream) are the same names because AWARE data was the first stream RAPIDS supported, meaning that it considers AWARE column names the default. - -??? info "PHONE_ACCELEROMETER" - - === "ANDROID" - - **RAPIDS_COLUMN_MAPPINGS** - - | RAPIDS column | Stream column | - |-----------------|-----------------| - | TIMESTAMP | timestamp | - | DEVICE_ID | device_id | - | DOUBLE_VALUES_0 | double_values_0 | - | DOUBLE_VALUES_1 | double_values_1 | - | DOUBLE_VALUES_2 | double_values_2 | - - **MUTATION** - - - **COLUMN_MAPPINGS** (None) - - **SCRIPTS** (None) - - === "IOS" - - Same as ANDROID - -??? info "PHONE_ACTIVITY_RECOGNITION" - - === "ANDROID" - - **RAPIDS_COLUMN_MAPPINGS** - - | RAPIDS column | Stream column | - |-----------------|-----------------| - | TIMESTAMP | timestamp | - | DEVICE_ID | device_id | - | ACTIVITY_NAME | activity_name | - | ACTIVITY_TYPE | activity_type | - | CONFIDENCE | confidence | - - **MUTATION** - - - **COLUMN_MAPPINGS** (None) - - **SCRIPTS** (None) - - === "IOS" - - **RAPIDS_COLUMN_MAPPINGS** - - | RAPIDS column | Stream column | - |-----------------|-----------------| - | TIMESTAMP | timestamp | - | DEVICE_ID | device_id | - | ACTIVITY_NAME | FLAG_TO_MUTATE | - | ACTIVITY_TYPE | FLAG_TO_MUTATE | - | CONFIDENCE | FLAG_TO_MUTATE | - - **MUTATION** - - - **COLUMN_MAPPINGS** - - | Script column | Stream column | - |-----------------|-----------------| - | ACTIVITIES | activities | - | CONFIDENCE | confidence | - - - **SCRIPTS** - - ```bash - src/data/streams/mutations/phone/aware/activity_recogniton_ios_unification.R - ``` - - - !!! note - For RAPIDS columns of `ACTIVITY_NAME` and `ACTIVITY_TYPE`: - - - if stream's `activities` field is automotive, set `ACTIVITY_NAME` = in_vehicle and `ACTIVITY_TYPE` = 0 - - if stream's `activities` field is cycling, set `ACTIVITY_NAME` = on_bicycle and `ACTIVITY_TYPE` = 1 - - if stream's `activities` field is walking, set `ACTIVITY_NAME` = walking and `ACTIVITY_TYPE` = 7 - - if stream's `activities` field is running, set `ACTIVITY_NAME` = running and `ACTIVITY_TYPE` = 8 - - if stream's `activities` field is stationary, set `ACTIVITY_NAME` = still and `ACTIVITY_TYPE` = 3 - - if stream's `activities` field is unknown, set `ACTIVITY_NAME` = unknown and `ACTIVITY_TYPE` = 4 - - For RAPIDS `CONFIDENCE` column: - - - if stream's `confidence` field is 0, set `CONFIDENCE` = 0 - - if stream's `confidence` field is 1, set `CONFIDENCE` = 50 - - if stream's `confidence` field is 2, set `CONFIDENCE` = 100 - - -??? info "PHONE_APPLICATIONS_CRASHES" - - === "ANDROID" - - **RAPIDS_COLUMN_MAPPINGS** - - | RAPIDS column | Stream column | - |--------------------|--------------------| - | TIMESTAMP | timestamp | - | DEVICE_ID | device_id | - | PACKAGE_NAME | package_name | - | APPLICATION_NAME | application_name | - | APPLICATION_VERSION| application_version| - | ERROR_SHORT | error_short | - | ERROR_LONG | error_long | - | ERROR_CONDITION | error_condition | - | IS_SYSTEM_APP | is_system_app | - - **MUTATION** - - - **COLUMN_MAPPINGS** (None) - - **SCRIPTS** (None) - - === "IOS" - - This sensor is not supported by iOS devices. - - -??? info "PHONE_APPLICATIONS_FOREGROUND" - - === "ANDROID" - - **RAPIDS_COLUMN_MAPPINGS** - - | RAPIDS column | Stream column | - |--------------------|--------------------| - | TIMESTAMP | timestamp | - | DEVICE_ID | device_id | - | PACKAGE_NAME | package_name | - | APPLICATION_NAME | application_name | - | IS_SYSTEM_APP | is_system_app | - - **MUTATION** - - - **COLUMN_MAPPINGS** (None) - - **SCRIPTS** (None) - - === "IOS" - - This sensor is not supported by iOS devices. - -??? info "PHONE_APPLICATIONS_NOTIFICATIONS" - - === "ANDROID" - - **RAPIDS_COLUMN_MAPPINGS** - - | RAPIDS column | Stream column | - |--------------------|--------------------| - | TIMESTAMP | timestamp | - | DEVICE_ID | device_id | - | PACKAGE_NAME | package_name | - | APPLICATION_NAME | application_name | - | TEXT | text | - | SOUND | sound | - | VIBRATE | vibrate | - | DEFAULTS | defaults | - | FLAGS | flags | - - **MUTATION** - - - **COLUMN_MAPPINGS** (None) - - **SCRIPTS** (None) - - === "IOS" - - This sensor is not supported by iOS devices. - -??? info "PHONE_BATTERY" - - === "ANDROID" - - **RAPIDS_COLUMN_MAPPINGS** - - | RAPIDS column | Stream column | - |----------------------|---------------------| - | TIMESTAMP | timestamp | - | DEVICE_ID | device_id | - | BATTERY_STATUS | battery_status | - | BATTERY_LEVEL | battery_level | - | BATTERY_SCALE | battery_scale | - - **MUTATION** - - - **COLUMN_MAPPINGS** (None) - - **SCRIPTS** (None) - - === "IOS Client V1" - - **RAPIDS_COLUMN_MAPPINGS** - - | RAPIDS column | Stream column | - |----------------------|---------------------| - | TIMESTAMP | timestamp | - | DEVICE_ID | device_id | - | BATTERY_STATUS | FLAG_TO_MUTATE | - | BATTERY_LEVEL | battery_level | - | BATTERY_SCALE | battery_scale | - - **MUTATION** - - - **COLUMN_MAPPINGS** - - | Script column | Stream column | - |----------------------|---------------------| - | BATTERY_STATUS | battery_status | - - - **SCRIPTS** - - ```bash - src/data/streams/mutations/phone/aware/battery_ios_unification.R - ``` - - !!! note - For RAPIDS `BATTERY_STATUS` column: - - - if stream's `battery_status` field is 3, set `BATTERY_STATUS` = 5 (full status) - - if stream's `battery_status` field is 1, set `BATTERY_STATUS` = 3 (discharge) - - === "IOS Client V2" - - Same as ANDROID - - -??? info "PHONE_BLUETOOTH" - - === "ANDROID" - - **RAPIDS_COLUMN_MAPPINGS** - - | RAPIDS column | Stream column | - |----------------------|---------------------| - | TIMESTAMP | timestamp | - | DEVICE_ID | device_id | - | BT_ADDRESS | bt_address | - | BT_NAME | bt_name | - | BT_RSSI | bt_rssi | - - **MUTATION** - - - **COLUMN_MAPPINGS** (None) - - **SCRIPTS** (None) - - === "IOS" - - This sensor is not supported by iOS devices. - - -??? info "PHONE_CALLS" - - === "ANDROID" - - **RAPIDS_COLUMN_MAPPINGS** - - | RAPIDS column | Stream column | - |----------------------|---------------------| - | TIMESTAMP | timestamp | - | DEVICE_ID | device_id | - | CALL_TYPE | call_type | - | CALL_DURATION | call_duration | - | TRACE | trace | - - **MUTATION** - - - **COLUMN_MAPPINGS** (None) - - **SCRIPTS** (None) - - === "IOS" - - **RAPIDS_COLUMN_MAPPINGS** - - | RAPIDS column | Stream column | - |----------------------|---------------------| - | TIMESTAMP | timestamp | - | DEVICE_ID | device_id | - | CALL_TYPE | FLAG_TO_MUTATE | - | CALL_DURATION | call_duration | - | TRACE | trace | - - **MUTATION** - - - **COLUMN_MAPPINGS** - - | Script column | Stream column | - |----------------------|---------------------| - | CALL_TYPE | call_type | - - - - **SCRIPTS** - - ```bash - src/data/streams/mutations/phone/aware/calls_ios_unification.R - ``` - - !!! note - - We transform iOS call logs into Android's format. iOS stores call status: 1=incoming, 2=connected, 3=dialing, 4=disconnected, as opposed to Android's events: 1=incoming, 2=outgoing, 3=missed. - - We follow this algorithm to convert iOS call data (there are some inaccuracies in the way we handle sequences, see new rules below): - - - Search for the disconnected (4) status as it is common to all calls - - Group all events that preceded every status 4 - - We convert every 1,2,4 (or 2,1,4) sequence to an incoming call - - We convert every 3,2,4 (or 2,3,4) sequence to an outgoing call - - We convert every 1,4 or 3,4 sequence to a missed call (either incoming or outgoing) - - We set the duration of the call to be the sum of every status (dialing/ringing to hangup) as opposed to the duration of the last status (pick up to hang up) - - **Tested with an Android (OnePlus 7T) and an iPhone XR** - - |Call type | Android (duration) | iOS (duration) | New Rule| - |---------|----------|--------|------| - |Outgoing missed ended by me | 2 (0) | 3,4 (0,X) | 3,4 is converted to 2 with duration 0| - |Outgoing missed ended by them|2(0)|3,2,4 (0,X,X2)| 3,2,4 is converted to 2 with duration X2*| - |Incoming missed ended by me|NA**|1,4 (0,X)|1,4 is converted to 3 with duration 0| - |Incoming missed ended by them|3(0)|1,4 (0,X)|1,4 is converted to 3 with duration 0| - |Outgoing answered|2(X excluding dialing time)|3,2,4 (0,X,X2)|3,2,4 is converted to 2 with duration X2| - |Incoming answered|1(X excluding dialing time)|1,2,4 (0,X,X2)|1,2,4 is converted to 1 with duration X2| - - .* There is no way to differentiate an outgoing missed call ended by them from an outgoing answered call because the phone goes directly to voice mail and it counts as call time (essentially the voice mail answered). - - .** Android does not record incoming missed calls ended by the participant, just those ended by the person calling or ignored by the participant. - - -??? info "PHONE_CONVERSATION" - - === "ANDROID" - - **RAPIDS_COLUMN_MAPPINGS** - - | RAPIDS column | Stream column | - |----------------------|---------------------| - | TIMESTAMP | timestamp | - | DEVICE_ID | device_id | - | DOUBLE_ENERGY | double_energy | - | INFERENCE | inference | - | DOUBLE_CONVO_START | double_convo_start | - | DOUBLE_CONVO_END | double_convo_end | - - **MUTATION** - - - **COLUMN_MAPPINGS** (None) - - **SCRIPTS** (None) - - === "IOS" - - **RAPIDS_COLUMN_MAPPINGS** - - | RAPIDS column | Stream column | - |----------------------|---------------------| - | TIMESTAMP | timestamp | - | DEVICE_ID | device_id | - | DOUBLE_ENERGY | double_energy | - | INFERENCE | inference | - | DOUBLE_CONVO_START | FLAG_TO_MUTATE | - | DOUBLE_CONVO_END | FLAG_TO_MUTATE | - - **MUTATION** - - - **COLUMN_MAPPINGS** - - | Script column | Stream column | - |----------------------|---------------------| - | DOUBLE_CONVO_START | double_convo_start | - | DOUBLE_CONVO_END | double_convo_end | - - - **SCRIPTS** - - ```bash - src/data/streams/mutations/phone/aware/conversation_ios_timestamp.R - ``` - - !!! note - For RAPIDS columns of `DOUBLE_CONVO_START` and `DOUBLE_CONVO_END`: - - - if stream's `double_convo_start` field is smaller than 9999999999, it is in seconds instead of milliseconds. Set `DOUBLE_CONVO_START` = 1000 * `double_convo_start`. - - if stream's `double_convo_end` field is smaller than 9999999999, it is in seconds instead of milliseconds. Set `DOUBLE_CONVO_END` = 1000 * `double_convo_end`. - - -??? info "PHONE_KEYBOARD" - - === "ANDROID" - - **RAPIDS_COLUMN_MAPPINGS** - - | RAPIDS column | Stream column | - |----------------------|---------------------| - | TIMESTAMP | timestamp | - | DEVICE_ID | device_id | - | PACKAGE_NAME | package_name | - | BEFORE_TEXT | before_text | - | CURRENT_TEXT | current_text | - | IS_PASSWORD | is_password | - - **MUTATION** - - - **COLUMN_MAPPINGS** (None) - - **SCRIPTS** (None) - - === "IOS" - - This sensor is not supported by iOS devices. - - -??? info "PHONE_LIGHT" - - === "ANDROID" - - **RAPIDS_COLUMN_MAPPINGS** - - | RAPIDS column | Stream column | - |----------------------|---------------------| - | TIMESTAMP | timestamp | - | DEVICE_ID | device_id | - | DOUBLE_LIGHT_LUX | double_light_lux | - | ACCURACY | accuracy | - - **MUTATION** - - - **COLUMN_MAPPINGS** (None) - - **SCRIPTS** (None) - - === "IOS" - - This sensor is not supported by iOS devices. - - -??? info "PHONE_LOCATIONS" - - === "ANDROID" - - **RAPIDS_COLUMN_MAPPINGS** - - | RAPIDS column | Stream column | - |----------------------|---------------------| - | TIMESTAMP | timestamp | - | DEVICE_ID | device_id | - | DOUBLE_LATITUDE | double_latitude | - | DOUBLE_LONGITUDE | double_longitude | - | DOUBLE_BEARING | double_bearing | - | DOUBLE_SPEED | double_speed | - | DOUBLE_ALTITUDE | double_altitude | - | PROVIDER | provider | - | ACCURACY | accuracy | - - **MUTATION** - - - **COLUMN_MAPPINGS** (None) - - **SCRIPTS** (None) - - === "IOS" - - Same as ANDROID - - -??? info "PHONE_LOG" - - === "ANDROID" - - **RAPIDS_COLUMN_MAPPINGS** - - | RAPIDS column | Stream column | - |----------------------|---------------------| - | TIMESTAMP | timestamp | - | DEVICE_ID | device_id | - | LOG_MESSAGE | log_message | - - **MUTATION** - - - **COLUMN_MAPPINGS** (None) - - **SCRIPTS** (None) - - === "IOS" - - Same as ANDROID - - -??? info "PHONE_MESSAGES" - - === "ANDROID" - - **RAPIDS_COLUMN_MAPPINGS** - - | RAPIDS column | Stream column | - |----------------------|---------------------| - | TIMESTAMP | timestamp | - | DEVICE_ID | device_id | - | MESSAGE_TYPE | message_type | - | TRACE | trace | - - **MUTATION** - - - **COLUMN_MAPPINGS** (None) - - **SCRIPTS** (None) - - === "IOS" - - This sensor is not supported by iOS devices. - - -??? info "PHONE_SCREEN" - - === "ANDROID" - - **RAPIDS_COLUMN_MAPPINGS** - - | RAPIDS column | Stream column | - |----------------------|---------------------| - | TIMESTAMP | timestamp | - | DEVICE_ID | device_id | - | SCREEN_STATUS | screen_status | - - **MUTATION** - - - **COLUMN_MAPPINGS** (None) - - **SCRIPTS** (None) - - === "IOS" - - **RAPIDS_COLUMN_MAPPINGS** - - | RAPIDS column | Stream column | - |----------------------|---------------------| - | TIMESTAMP | timestamp | - | DEVICE_ID | device_id | - | SCREEN_STATUS | FLAG_TO_MUTATE | - - **MUTATION** - - - **COLUMN_MAPPINGS** - - | Script column | Stream column | - |----------------------|---------------------| - | SCREEN_STATUS | screen_status | - - - **SCRIPTS** - - ```bash - src/data/streams/mutations/phone/aware/screen_ios_unification.R - ``` - - !!! note - For `SCREEN_STATUS` RAPIDS column: - - - if stream's `screen_status` field is 2 (lock episode), set `SCREEN_STATUS` = 0 (off episode). - - -??? info "PHONE_WIFI_CONNECTED" - - === "ANDROID" - - **RAPIDS_COLUMN_MAPPINGS** - - | RAPIDS column | Stream column | - |----------------------|---------------------| - | TIMESTAMP | timestamp | - | DEVICE_ID | device_id | - | MAC_ADDRESS | mac_address | - | SSID | ssid | - | BSSID | bssid | - - **MUTATION** - - - **COLUMN_MAPPINGS** (None) - - **SCRIPTS** (None) - - === "IOS" - - Same as ANDROID - - -??? info "PHONE_WIFI_VISIBLE" - - === "ANDROID" - - **RAPIDS_COLUMN_MAPPINGS** - - | RAPIDS column | Stream column | - |----------------------|---------------------| - | TIMESTAMP | timestamp | - | DEVICE_ID | device_id | - | SSID | ssid | - | BSSID | bssid | - | SECURITY | security | - | FREQUENCY | frequency | - | RSSI | rssi | - - **MUTATION** - - - **COLUMN_MAPPINGS** (None) - - **SCRIPTS** (None) - - === "IOS" - - This sensor is not supported by iOS devices. +--8<---- "docs/snippets/aware_format.md" diff --git a/docs/datastreams/fitbitjson-csv.md b/docs/datastreams/fitbitjson-csv.md index 39aa597a..0c5422a3 100644 --- a/docs/datastreams/fitbitjson-csv.md +++ b/docs/datastreams/fitbitjson-csv.md @@ -1,6 +1,15 @@ # `fitbitjson_csv` This [data stream](../../datastreams/data-streams-introduction) handles Fitbit sensor data downloaded using the [Fitbit Web API](https://dev.fitbit.com/build/reference/web-api/) and stored in a CSV file. Please note that RAPIDS cannot query the API directly; you need to use other available tools or implement your own. Once you have your sensor data in a CSV file, RAPIDS can process it. +!!! warning + The CSV files have to use `,` as separator, `\` as escape character (do not escape `"` with `""`), and wrap any string columns with `"`. + + ??? example "Example of a valid CSV file" + ```csv + "timestamp","device_id","label","fitbit_id","fitbit_data_type","fitbit_data" + 1587614400000,"a748ee1a-1d0b-4ae9-9074-279a2b6ba524","5S","5ZKN9B","steps","{\"activities-steps\":[{\"dateTime\":\"2020-04-23\",\"value\":\"7881\"}]" + ``` + ## Container The container should be a CSV file per Fitbit sensor, each containing all participants' data. diff --git a/docs/datastreams/fitbitparsed-csv.md b/docs/datastreams/fitbitparsed-csv.md index 9e07b9ca..1319ad90 100644 --- a/docs/datastreams/fitbitparsed-csv.md +++ b/docs/datastreams/fitbitparsed-csv.md @@ -4,6 +4,18 @@ This [data stream](../../datastreams/data-streams-introduction) handles Fitbit s !!! info "What is the difference between JSON and plain data streams" Most people will only need `fitbitjson_*` because they downloaded and stored their data directly from Fitbit's API. However, if, for some reason, you don't have access to that JSON data and instead only have the parsed data (columns and rows), you can use this data stream. +!!! warning + The CSV files have to use `,` as separator, `\` as escape character (do not escape `"` with `""`), and wrap any string columns with `"`. + + ??? example "Example of a valid CSV file" + ```csv + "device_id","heartrate","heartrate_zone","local_date_time","timestamp" + "a748ee1a-1d0b-4ae9-9074-279a2b6ba524",69,"outofrange","2020-04-23 00:00:00",0 + "a748ee1a-1d0b-4ae9-9074-279a2b6ba524",69,"outofrange","2020-04-23 00:01:00",0 + "a748ee1a-1d0b-4ae9-9074-279a2b6ba524",67,"outofrange","2020-04-23 00:02:00",0 + "a748ee1a-1d0b-4ae9-9074-279a2b6ba524",69,"outofrange","2020-04-23 00:03:00",0 + ``` + ## Container The container should be a CSV file per sensor, each containing all participants' data. diff --git a/docs/snippets/aware_format.md b/docs/snippets/aware_format.md new file mode 100644 index 00000000..c3964660 --- /dev/null +++ b/docs/snippets/aware_format.md @@ -0,0 +1,606 @@ +If you collected sensor data with the vanilla (original) AWARE mobile clients, you shouldn't need to modify this format (described below). + +Remember that a format maps and transforms columns in your raw data stream to the [mandatory columns RAPIDS needs](../mandatory-phone-format). + +The yaml file that describes the format of this data stream is at: +```bash +src/data/streams/aware_csv/format.yaml +``` + +For some sensors, we need to transform iOS data into Android format; you can refer to [OS complex mapping](../../datastreams/add-new-data-streams/#os-complex-mapping) for learn how this works. + +!!! hint + The mappings in this stream (RAPIDS/Stream) are the same names because AWARE data was the first stream RAPIDS supported, meaning that it considers AWARE column names the default. + +??? info "PHONE_ACCELEROMETER" + + === "ANDROID" + + **RAPIDS_COLUMN_MAPPINGS** + + | RAPIDS column | Stream column | + |-----------------|-----------------| + | TIMESTAMP | timestamp | + | DEVICE_ID | device_id | + | DOUBLE_VALUES_0 | double_values_0 | + | DOUBLE_VALUES_1 | double_values_1 | + | DOUBLE_VALUES_2 | double_values_2 | + + **MUTATION** + + - **COLUMN_MAPPINGS** (None) + - **SCRIPTS** (None) + + === "IOS" + + Same as ANDROID + +??? info "PHONE_ACTIVITY_RECOGNITION" + + === "ANDROID" + + **RAPIDS_COLUMN_MAPPINGS** + + | RAPIDS column | Stream column | + |-----------------|-----------------| + | TIMESTAMP | timestamp | + | DEVICE_ID | device_id | + | ACTIVITY_NAME | activity_name | + | ACTIVITY_TYPE | activity_type | + | CONFIDENCE | confidence | + + **MUTATION** + + - **COLUMN_MAPPINGS** (None) + - **SCRIPTS** (None) + + === "IOS" + + **RAPIDS_COLUMN_MAPPINGS** + + | RAPIDS column | Stream column | + |-----------------|-----------------| + | TIMESTAMP | timestamp | + | DEVICE_ID | device_id | + | ACTIVITY_NAME | FLAG_TO_MUTATE | + | ACTIVITY_TYPE | FLAG_TO_MUTATE | + | CONFIDENCE | FLAG_TO_MUTATE | + + **MUTATION** + + - **COLUMN_MAPPINGS** + + | Script column | Stream column | + |-----------------|-----------------| + | ACTIVITIES | activities | + | CONFIDENCE | confidence | + + - **SCRIPTS** + + ```bash + src/data/streams/mutations/phone/aware/activity_recogniton_ios_unification.R + ``` + + + !!! note + For RAPIDS columns of `ACTIVITY_NAME` and `ACTIVITY_TYPE`: + + - if stream's `activities` field is automotive, set `ACTIVITY_NAME` = in_vehicle and `ACTIVITY_TYPE` = 0 + - if stream's `activities` field is cycling, set `ACTIVITY_NAME` = on_bicycle and `ACTIVITY_TYPE` = 1 + - if stream's `activities` field is walking, set `ACTIVITY_NAME` = walking and `ACTIVITY_TYPE` = 7 + - if stream's `activities` field is running, set `ACTIVITY_NAME` = running and `ACTIVITY_TYPE` = 8 + - if stream's `activities` field is stationary, set `ACTIVITY_NAME` = still and `ACTIVITY_TYPE` = 3 + - if stream's `activities` field is unknown, set `ACTIVITY_NAME` = unknown and `ACTIVITY_TYPE` = 4 + + For RAPIDS `CONFIDENCE` column: + + - if stream's `confidence` field is 0, set `CONFIDENCE` = 0 + - if stream's `confidence` field is 1, set `CONFIDENCE` = 50 + - if stream's `confidence` field is 2, set `CONFIDENCE` = 100 + + +??? info "PHONE_APPLICATIONS_CRASHES" + + === "ANDROID" + + **RAPIDS_COLUMN_MAPPINGS** + + | RAPIDS column | Stream column | + |--------------------|--------------------| + | TIMESTAMP | timestamp | + | DEVICE_ID | device_id | + | PACKAGE_NAME | package_name | + | APPLICATION_NAME | application_name | + | APPLICATION_VERSION| application_version| + | ERROR_SHORT | error_short | + | ERROR_LONG | error_long | + | ERROR_CONDITION | error_condition | + | IS_SYSTEM_APP | is_system_app | + + **MUTATION** + + - **COLUMN_MAPPINGS** (None) + - **SCRIPTS** (None) + + === "IOS" + + This sensor is not supported by iOS devices. + + +??? info "PHONE_APPLICATIONS_FOREGROUND" + + === "ANDROID" + + **RAPIDS_COLUMN_MAPPINGS** + + | RAPIDS column | Stream column | + |--------------------|--------------------| + | TIMESTAMP | timestamp | + | DEVICE_ID | device_id | + | PACKAGE_NAME | package_name | + | APPLICATION_NAME | application_name | + | IS_SYSTEM_APP | is_system_app | + + **MUTATION** + + - **COLUMN_MAPPINGS** (None) + - **SCRIPTS** (None) + + === "IOS" + + This sensor is not supported by iOS devices. + +??? info "PHONE_APPLICATIONS_NOTIFICATIONS" + + === "ANDROID" + + **RAPIDS_COLUMN_MAPPINGS** + + | RAPIDS column | Stream column | + |--------------------|--------------------| + | TIMESTAMP | timestamp | + | DEVICE_ID | device_id | + | PACKAGE_NAME | package_name | + | APPLICATION_NAME | application_name | + | TEXT | text | + | SOUND | sound | + | VIBRATE | vibrate | + | DEFAULTS | defaults | + | FLAGS | flags | + + **MUTATION** + + - **COLUMN_MAPPINGS** (None) + - **SCRIPTS** (None) + + === "IOS" + + This sensor is not supported by iOS devices. + +??? info "PHONE_BATTERY" + + === "ANDROID" + + **RAPIDS_COLUMN_MAPPINGS** + + | RAPIDS column | Stream column | + |----------------------|---------------------| + | TIMESTAMP | timestamp | + | DEVICE_ID | device_id | + | BATTERY_STATUS | battery_status | + | BATTERY_LEVEL | battery_level | + | BATTERY_SCALE | battery_scale | + + **MUTATION** + + - **COLUMN_MAPPINGS** (None) + - **SCRIPTS** (None) + + === "IOS Client V1" + + **RAPIDS_COLUMN_MAPPINGS** + + | RAPIDS column | Stream column | + |----------------------|---------------------| + | TIMESTAMP | timestamp | + | DEVICE_ID | device_id | + | BATTERY_STATUS | FLAG_TO_MUTATE | + | BATTERY_LEVEL | battery_level | + | BATTERY_SCALE | battery_scale | + + **MUTATION** + + - **COLUMN_MAPPINGS** + + | Script column | Stream column | + |----------------------|---------------------| + | BATTERY_STATUS | battery_status | + + - **SCRIPTS** + + ```bash + src/data/streams/mutations/phone/aware/battery_ios_unification.R + ``` + + !!! note + For RAPIDS `BATTERY_STATUS` column: + + - if stream's `battery_status` field is 3, set `BATTERY_STATUS` = 5 (full status) + - if stream's `battery_status` field is 1, set `BATTERY_STATUS` = 3 (discharge) + + === "IOS Client V2" + + Same as ANDROID + + +??? info "PHONE_BLUETOOTH" + + === "ANDROID" + + **RAPIDS_COLUMN_MAPPINGS** + + | RAPIDS column | Stream column | + |----------------------|---------------------| + | TIMESTAMP | timestamp | + | DEVICE_ID | device_id | + | BT_ADDRESS | bt_address | + | BT_NAME | bt_name | + | BT_RSSI | bt_rssi | + + **MUTATION** + + - **COLUMN_MAPPINGS** (None) + - **SCRIPTS** (None) + + === "IOS" + + This sensor is not supported by iOS devices. + + +??? info "PHONE_CALLS" + + === "ANDROID" + + **RAPIDS_COLUMN_MAPPINGS** + + | RAPIDS column | Stream column | + |----------------------|---------------------| + | TIMESTAMP | timestamp | + | DEVICE_ID | device_id | + | CALL_TYPE | call_type | + | CALL_DURATION | call_duration | + | TRACE | trace | + + **MUTATION** + + - **COLUMN_MAPPINGS** (None) + - **SCRIPTS** (None) + + === "IOS" + + **RAPIDS_COLUMN_MAPPINGS** + + | RAPIDS column | Stream column | + |----------------------|---------------------| + | TIMESTAMP | timestamp | + | DEVICE_ID | device_id | + | CALL_TYPE | FLAG_TO_MUTATE | + | CALL_DURATION | call_duration | + | TRACE | trace | + + **MUTATION** + + - **COLUMN_MAPPINGS** + + | Script column | Stream column | + |----------------------|---------------------| + | CALL_TYPE | call_type | + + + - **SCRIPTS** + + ```bash + src/data/streams/mutations/phone/aware/calls_ios_unification.R + ``` + + !!! note + + We transform iOS call logs into Android's format. iOS stores call status: 1=incoming, 2=connected, 3=dialing, 4=disconnected, as opposed to Android's events: 1=incoming, 2=outgoing, 3=missed. + + We follow this algorithm to convert iOS call data (there are some inaccuracies in the way we handle sequences, see new rules below): + + - Search for the disconnected (4) status as it is common to all calls + - Group all events that preceded every status 4 + - We convert every 1,2,4 (or 2,1,4) sequence to an incoming call + - We convert every 3,2,4 (or 2,3,4) sequence to an outgoing call + - We convert every 1,4 or 3,4 sequence to a missed call (either incoming or outgoing) + - We set the duration of the call to be the sum of every status (dialing/ringing to hangup) as opposed to the duration of the last status (pick up to hang up) + + **Tested with an Android (OnePlus 7T) and an iPhone XR** + + |Call type | Android (duration) | iOS (duration) | New Rule| + |---------|----------|--------|------| + |Outgoing missed ended by me | 2 (0) | 3,4 (0,X) | 3,4 is converted to 2 with duration 0| + |Outgoing missed ended by them|2(0)|3,2,4 (0,X,X2)| 3,2,4 is converted to 2 with duration X2*| + |Incoming missed ended by me|NA**|1,4 (0,X)|1,4 is converted to 3 with duration 0| + |Incoming missed ended by them|3(0)|1,4 (0,X)|1,4 is converted to 3 with duration 0| + |Outgoing answered|2(X excluding dialing time)|3,2,4 (0,X,X2)|3,2,4 is converted to 2 with duration X2| + |Incoming answered|1(X excluding dialing time)|1,2,4 (0,X,X2)|1,2,4 is converted to 1 with duration X2| + + .* There is no way to differentiate an outgoing missed call ended by them from an outgoing answered call because the phone goes directly to voice mail and it counts as call time (essentially the voice mail answered). + + .** Android does not record incoming missed calls ended by the participant, just those ended by the person calling or ignored by the participant. + + +??? info "PHONE_CONVERSATION" + + === "ANDROID" + + **RAPIDS_COLUMN_MAPPINGS** + + | RAPIDS column | Stream column | + |----------------------|---------------------| + | TIMESTAMP | timestamp | + | DEVICE_ID | device_id | + | DOUBLE_ENERGY | double_energy | + | INFERENCE | inference | + | DOUBLE_CONVO_START | double_convo_start | + | DOUBLE_CONVO_END | double_convo_end | + + **MUTATION** + + - **COLUMN_MAPPINGS** (None) + - **SCRIPTS** (None) + + === "IOS" + + **RAPIDS_COLUMN_MAPPINGS** + + | RAPIDS column | Stream column | + |----------------------|---------------------| + | TIMESTAMP | timestamp | + | DEVICE_ID | device_id | + | DOUBLE_ENERGY | double_energy | + | INFERENCE | inference | + | DOUBLE_CONVO_START | FLAG_TO_MUTATE | + | DOUBLE_CONVO_END | FLAG_TO_MUTATE | + + **MUTATION** + + - **COLUMN_MAPPINGS** + + | Script column | Stream column | + |----------------------|---------------------| + | DOUBLE_CONVO_START | double_convo_start | + | DOUBLE_CONVO_END | double_convo_end | + + - **SCRIPTS** + + ```bash + src/data/streams/mutations/phone/aware/conversation_ios_timestamp.R + ``` + + !!! note + For RAPIDS columns of `DOUBLE_CONVO_START` and `DOUBLE_CONVO_END`: + + - if stream's `double_convo_start` field is smaller than 9999999999, it is in seconds instead of milliseconds. Set `DOUBLE_CONVO_START` = 1000 * `double_convo_start`. + - if stream's `double_convo_end` field is smaller than 9999999999, it is in seconds instead of milliseconds. Set `DOUBLE_CONVO_END` = 1000 * `double_convo_end`. + + +??? info "PHONE_KEYBOARD" + + === "ANDROID" + + **RAPIDS_COLUMN_MAPPINGS** + + | RAPIDS column | Stream column | + |----------------------|---------------------| + | TIMESTAMP | timestamp | + | DEVICE_ID | device_id | + | PACKAGE_NAME | package_name | + | BEFORE_TEXT | before_text | + | CURRENT_TEXT | current_text | + | IS_PASSWORD | is_password | + + **MUTATION** + + - **COLUMN_MAPPINGS** (None) + - **SCRIPTS** (None) + + === "IOS" + + This sensor is not supported by iOS devices. + + +??? info "PHONE_LIGHT" + + === "ANDROID" + + **RAPIDS_COLUMN_MAPPINGS** + + | RAPIDS column | Stream column | + |----------------------|---------------------| + | TIMESTAMP | timestamp | + | DEVICE_ID | device_id | + | DOUBLE_LIGHT_LUX | double_light_lux | + | ACCURACY | accuracy | + + **MUTATION** + + - **COLUMN_MAPPINGS** (None) + - **SCRIPTS** (None) + + === "IOS" + + This sensor is not supported by iOS devices. + + +??? info "PHONE_LOCATIONS" + + === "ANDROID" + + **RAPIDS_COLUMN_MAPPINGS** + + | RAPIDS column | Stream column | + |----------------------|---------------------| + | TIMESTAMP | timestamp | + | DEVICE_ID | device_id | + | DOUBLE_LATITUDE | double_latitude | + | DOUBLE_LONGITUDE | double_longitude | + | DOUBLE_BEARING | double_bearing | + | DOUBLE_SPEED | double_speed | + | DOUBLE_ALTITUDE | double_altitude | + | PROVIDER | provider | + | ACCURACY | accuracy | + + **MUTATION** + + - **COLUMN_MAPPINGS** (None) + - **SCRIPTS** (None) + + === "IOS" + + Same as ANDROID + + +??? info "PHONE_LOG" + + === "ANDROID" + + **RAPIDS_COLUMN_MAPPINGS** + + | RAPIDS column | Stream column | + |----------------------|---------------------| + | TIMESTAMP | timestamp | + | DEVICE_ID | device_id | + | LOG_MESSAGE | log_message | + + **MUTATION** + + - **COLUMN_MAPPINGS** (None) + - **SCRIPTS** (None) + + === "IOS" + + Same as ANDROID + + +??? info "PHONE_MESSAGES" + + === "ANDROID" + + **RAPIDS_COLUMN_MAPPINGS** + + | RAPIDS column | Stream column | + |----------------------|---------------------| + | TIMESTAMP | timestamp | + | DEVICE_ID | device_id | + | MESSAGE_TYPE | message_type | + | TRACE | trace | + + **MUTATION** + + - **COLUMN_MAPPINGS** (None) + - **SCRIPTS** (None) + + === "IOS" + + This sensor is not supported by iOS devices. + + +??? info "PHONE_SCREEN" + + === "ANDROID" + + **RAPIDS_COLUMN_MAPPINGS** + + | RAPIDS column | Stream column | + |----------------------|---------------------| + | TIMESTAMP | timestamp | + | DEVICE_ID | device_id | + | SCREEN_STATUS | screen_status | + + **MUTATION** + + - **COLUMN_MAPPINGS** (None) + - **SCRIPTS** (None) + + === "IOS" + + **RAPIDS_COLUMN_MAPPINGS** + + | RAPIDS column | Stream column | + |----------------------|---------------------| + | TIMESTAMP | timestamp | + | DEVICE_ID | device_id | + | SCREEN_STATUS | FLAG_TO_MUTATE | + + **MUTATION** + + - **COLUMN_MAPPINGS** + + | Script column | Stream column | + |----------------------|---------------------| + | SCREEN_STATUS | screen_status | + + - **SCRIPTS** + + ```bash + src/data/streams/mutations/phone/aware/screen_ios_unification.R + ``` + + !!! note + For `SCREEN_STATUS` RAPIDS column: + + - if stream's `screen_status` field is 2 (lock episode), set `SCREEN_STATUS` = 0 (off episode). + + +??? info "PHONE_WIFI_CONNECTED" + + === "ANDROID" + + **RAPIDS_COLUMN_MAPPINGS** + + | RAPIDS column | Stream column | + |----------------------|---------------------| + | TIMESTAMP | timestamp | + | DEVICE_ID | device_id | + | MAC_ADDRESS | mac_address | + | SSID | ssid | + | BSSID | bssid | + + **MUTATION** + + - **COLUMN_MAPPINGS** (None) + - **SCRIPTS** (None) + + === "IOS" + + Same as ANDROID + + +??? info "PHONE_WIFI_VISIBLE" + + === "ANDROID" + + **RAPIDS_COLUMN_MAPPINGS** + + | RAPIDS column | Stream column | + |----------------------|---------------------| + | TIMESTAMP | timestamp | + | DEVICE_ID | device_id | + | SSID | ssid | + | BSSID | bssid | + | SECURITY | security | + | FREQUENCY | frequency | + | RSSI | rssi | + + **MUTATION** + + - **COLUMN_MAPPINGS** (None) + - **SCRIPTS** (None) + + === "IOS" + + This sensor is not supported by iOS devices. + diff --git a/mkdocs.yml b/mkdocs.yml index bb84cc52..87f2627f 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -85,6 +85,7 @@ nav: - Introduction: datastreams/data-streams-introduction.md - Phone: - aware_mysql: datastreams/aware-mysql.md + - aware_csv: datastreams/aware-csv.md - Mandatory Phone Format: datastreams/mandatory-phone-format.md - Fitbit: - fitbitjson_mysql: datastreams/fitbitjson-mysql.md diff --git a/src/data/datetime/assign_to_time_segment.R b/src/data/datetime/assign_to_time_segment.R index 7b260c07..09f27b14 100644 --- a/src/data/datetime/assign_to_time_segment.R +++ b/src/data/datetime/assign_to_time_segment.R @@ -80,7 +80,8 @@ assign_to_time_segment <- function(sensor_data, time_segments, time_segments_typ mutate(data = map2(data, local_timezone, assign_rows_to_segments_frequency, time_segments)) %>% unnest(cols = data) %>% arrange(timestamp) %>% - select(-local_time_obj) + select(-local_time_obj) %>% + ungroup() return(sensor_data) @@ -171,5 +172,5 @@ assign_to_time_segment <- function(sensor_data, time_segments, time_segments_typ arrange(timestamp) } - return(sensor_data) + return(sensor_data %>% ungroup()) } \ No newline at end of file diff --git a/src/data/datetime/readable_datetime.R b/src/data/datetime/readable_datetime.R index 30dc17f2..117c94e6 100644 --- a/src/data/datetime/readable_datetime.R +++ b/src/data/datetime/readable_datetime.R @@ -61,7 +61,8 @@ create_mising_temporal_column <- function(data, device_type){ mutate(data = map2(data, local_timezone, function(nested_data, tz){ return(nested_data %>% mutate(timestamp = as.numeric(ymd_hms(local_date_time, tz=tz)) * 1000) %>% drop_na(timestamp)) })) %>% - unnest(cols = everything())) + unnest(cols = everything())) %>% + ungroup() } else { # For the rest of devices we infere local date time from timestamp if(nrow(data) == 0) @@ -72,7 +73,8 @@ create_mising_temporal_column <- function(data, device_type){ mutate(data = map2(data, local_timezone, function(nested_data, tz){ return(nested_data %>% mutate(local_date_time = format(as_datetime(timestamp / 1000, tz=tz), format="%Y-%m-%d %H:%M:%S")) %>% drop_na(local_date_time) ) })) %>% - unnest(cols = everything())) + unnest(cols = everything())) %>% + ungroup() } } @@ -118,6 +120,7 @@ readable_datetime <- function(){ output <- split_local_date_time(output) output <- assign_to_time_segment(output, time_segments, time_segments_type, include_past_periodic_segments) output <- filter_wanted_dates(output, participant_file, device_type) + output <- output %>% arrange(timestamp) write_csv(output, snakemake@output[[1]]) } diff --git a/src/data/streams/aware_csv/container.R b/src/data/streams/aware_csv/container.R new file mode 100644 index 00000000..2b6a20b1 --- /dev/null +++ b/src/data/streams/aware_csv/container.R @@ -0,0 +1,77 @@ +# if you need a new package, you should add it with renv::install(package) so your renv venv is updated +library(readr) + +# This file gets executed for each PHONE_SENSOR of each participant +# If you are connecting to a database the env file containing its credentials is available at "./.env" +# If you are reading a CSV file instead of a DB table, the @param sensor_container wil contain the file path as set in config.yaml +# You are not bound to databases or files, you can query a web API or whatever data source you need. + +#' @description +#' RAPIDS allows users to use the keyword "infer" (previously "multiple") to automatically infer the mobile Operative System a device was running. +#' If you have a way to infer the OS of a device ID, implement this function. For example, for AWARE data we use the "aware_device" table. +#' +#' If you don't have a way to infer the OS, call stop("Error Message") so other users know they can't use "infer" or the inference failed, +#' and they have to assign the OS manually in the participant file +#' +#' @param stream_parameters The PHONE_STREAM_PARAMETERS key in config.yaml. If you need specific parameters add them there. +#' @param device A device ID string +#' @return The OS the device ran, "android" or "ios" + +infer_device_os <- function(stream_parameters, device){ + if(!dir.exists(stream_parameters$FOLDER)) + stop("[PHONE_DATA_STREAMS][aware_csv][FOLDER] does not exist: ", stream_parameters$FOLDER) + data_file <- file.path(stream_parameters$FOLDER, "aware_device.csv") + + if(!file.exists(data_file)) + stop("If you want to infer the OS of a smartphone using aware_csv, you need to have a CSV file called aware_device.csv with a 'device_id' and 'brand' columns, but this file does not exist: ", data_file) + + # Letting the user know what we are doing + message(paste0("Reading this CSV file: ", data_file)) + + os <- read_delim_chunked(data_file, escape_backslash = TRUE, delim = ",", escape_double = FALSE, quote = "\"", + callback = DataFrameCallback$new(function(x, pos) x[x[["device_id"]] == device, c("brand", "device_id")] ), progress = T, chunk_size = 50000) + + if(is.null(os)) # emtpy file + os <- read.csv(data_file) + + if(nrow(os) > 0) + return(os %>% mutate(os = ifelse(brand == "iPhone", "ios", "android")) %>% pull(os)) + else + stop(paste("We cannot infer the OS of the following device id because it does not exist in the aware_device table:", device)) + return(os) +} + + +#' @description +#' Gets the sensor data for a specific device id from a database table, file or whatever source you want to query +#' +#' @param stream_parameters The PHONE_STREAM_PARAMETERS key in config.yaml. If you need specific parameters add them there. +#' @param device A device ID string +#' @param sensor_container database table or file containing the sensor data for all participants. This is the PHONE_SENSOR[TABLE] key in config.yaml +#' @param columns the columns needed from this sensor (we recommend to only return these columns instead of every column in sensor_container) +#' @return A dataframe with the sensor data for device + +pull_data <- function(stream_parameters, device, sensor, sensor_container, columns){ + if(!dir.exists(stream_parameters$FOLDER)) + stop("[PHONE_DATA_STREAMS][aware_csv][FOLDER] does not exist: ", stream_parameters$FOLDER) + data_file <- file.path(stream_parameters$FOLDER, sensor_container) + + if(!file.exists(data_file)) + stop("The data container should be a CSV file but it does not exist: ", data_file) + + if(!endsWith(data_file, ".csv")) + stop("The data container should be a CSV file: ", data_file) + + # Letting the user know what we are doing + message(paste0("Reading this CSV file: ", data_file)) + + sensor_data <- read_delim_chunked(data_file, escape_backslash = TRUE, delim = ",", escape_double = FALSE, quote = "\"", + callback = DataFrameCallback$new(function(x, pos) x[x[[columns$DEVICE_ID]] == device, unlist(columns, use.names = FALSE)] ), progress = T, chunk_size = 50000) + # callback = DataFrameCallback$new(function(x, pos) subset(x,x[[columns$DEVICE_ID]] == device, select = unlist(columns))), progress = T, chunk_size = 50000) + if(is.null(sensor_data)) # emtpy file + sensor_data <- read.csv(data_file) %>% select(unlist(columns, use.names = FALSE)) + + if(nrow(sensor_data) == 0) + warning("The device '", device,"' did not have data in ", sensor_container) + return(sensor_data) +} diff --git a/src/data/streams/aware_csv/format.yaml b/src/data/streams/aware_csv/format.yaml new file mode 100644 index 00000000..ee0bd0c4 --- /dev/null +++ b/src/data/streams/aware_csv/format.yaml @@ -0,0 +1,315 @@ +PHONE_ACCELEROMETER: + ANDROID: + RAPIDS_COLUMN_MAPPINGS: + TIMESTAMP: timestamp + DEVICE_ID: device_id + DOUBLE_VALUES_0: double_values_0 + DOUBLE_VALUES_1: double_values_1 + DOUBLE_VALUES_2: double_values_2 + MUTATION: + COLUMN_MAPPINGS: + SCRIPTS: # List any python or r scripts that mutate your raw data + IOS: + RAPIDS_COLUMN_MAPPINGS: + TIMESTAMP: timestamp + DEVICE_ID: device_id + DOUBLE_VALUES_0: double_values_0 + DOUBLE_VALUES_1: double_values_1 + DOUBLE_VALUES_2: double_values_2 + MUTATION: + COLUMN_MAPPINGS: + SCRIPTS: # List any python or r scripts that mutate your raw data + +PHONE_ACTIVITY_RECOGNITION: + ANDROID: + RAPIDS_COLUMN_MAPPINGS: + TIMESTAMP: timestamp + DEVICE_ID: device_id + ACTIVITY_NAME: activity_name + ACTIVITY_TYPE: activity_type + CONFIDENCE: confidence + MUTATION: + COLUMN_MAPPINGS: + SCRIPTS: # List any python or r scripts that mutate your raw data + IOS: + RAPIDS_COLUMN_MAPPINGS: + TIMESTAMP: timestamp + DEVICE_ID: device_id + ACTIVITY_NAME: FLAG_TO_MUTATE + ACTIVITY_TYPE: FLAG_TO_MUTATE + CONFIDENCE: FLAG_TO_MUTATE + MUTATION: + COLUMN_MAPPINGS: + ACTIVITIES: activities + CONFIDENCE: confidence + SCRIPTS: # List any python or r scripts that mutate your raw data + - "src/data/streams/mutations/phone/aware/activity_recogniton_ios_unification.R" + +PHONE_APPLICATIONS_CRASHES: + ANDROID: + RAPIDS_COLUMN_MAPPINGS: + TIMESTAMP: timestamp + DEVICE_ID: device_id + PACKAGE_NAME: package_name + APPLICATION_NAME: application_name + APPLICATION_VERSION: application_version + ERROR_SHORT: error_short + ERROR_LONG: error_long + ERROR_CONDITION: error_condition + IS_SYSTEM_APP: is_system_app + MUTATION: + COLUMN_MAPPINGS: + SCRIPTS: # List any python or r scripts that mutate your raw data + +PHONE_APPLICATIONS_FOREGROUND: + ANDROID: + RAPIDS_COLUMN_MAPPINGS: + TIMESTAMP: timestamp + DEVICE_ID: device_id + PACKAGE_NAME: package_name + APPLICATION_NAME: application_name + IS_SYSTEM_APP: is_system_app + MUTATION: + COLUMN_MAPPINGS: + SCRIPTS: # List any python or r scripts that mutate your raw data + +PHONE_APPLICATIONS_NOTIFICATIONS: + ANDROID: + RAPIDS_COLUMN_MAPPINGS: + TIMESTAMP: timestamp + DEVICE_ID: device_id + PACKAGE_NAME: package_name + APPLICATION_NAME: application_name + TEXT: text + SOUND: sound + VIBRATE: vibrate + DEFAULTS: defaults + FLAGS: flags + MUTATION: + COLUMN_MAPPINGS: + SCRIPTS: # List any python or r scripts that mutate your raw data + +PHONE_BATTERY: + ANDROID: + RAPIDS_COLUMN_MAPPINGS: + TIMESTAMP: timestamp + DEVICE_ID: device_id + BATTERY_STATUS: battery_status + BATTERY_LEVEL: battery_level + BATTERY_SCALE: battery_scale + MUTATION: + COLUMN_MAPPINGS: + SCRIPTS: # List any python or r scripts that mutate your raw data + IOS: + RAPIDS_COLUMN_MAPPINGS: + TIMESTAMP: timestamp + DEVICE_ID: device_id + BATTERY_STATUS: FLAG_TO_MUTATE + BATTERY_LEVEL: battery_level + BATTERY_SCALE: battery_scale + MUTATION: + COLUMN_MAPPINGS: + BATTERY_STATUS: battery_status + SCRIPTS: + - "src/data/streams/mutations/phone/aware/battery_ios_unification.R" + +PHONE_BLUETOOTH: + ANDROID: + RAPIDS_COLUMN_MAPPINGS: + TIMESTAMP: timestamp + DEVICE_ID: device_id + BT_ADDRESS: bt_address + BT_NAME: bt_name + BT_RSSI: bt_rssi + MUTATION: + COLUMN_MAPPINGS: + SCRIPTS: # List any python or r scripts that mutate your raw data + +PHONE_CALLS: + ANDROID: + RAPIDS_COLUMN_MAPPINGS: + TIMESTAMP: timestamp + DEVICE_ID: device_id + CALL_TYPE: call_type + CALL_DURATION: call_duration + TRACE: trace + MUTATION: + COLUMN_MAPPINGS: + SCRIPTS: # List any python or r scripts that mutate your raw data + IOS: + RAPIDS_COLUMN_MAPPINGS: + TIMESTAMP: timestamp + DEVICE_ID: device_id + CALL_TYPE: FLAG_TO_MUTATE + CALL_DURATION: call_duration + TRACE: trace + MUTATION: + COLUMN_MAPPINGS: + CALL_TYPE: call_type + SCRIPTS: + - "src/data/streams/mutations/phone/aware/calls_ios_unification.R" + +PHONE_CONVERSATION: + ANDROID: + RAPIDS_COLUMN_MAPPINGS: + TIMESTAMP: timestamp + DEVICE_ID: device_id + DOUBLE_ENERGY: double_energy + INFERENCE: inference + DOUBLE_CONVO_START: double_convo_start + DOUBLE_CONVO_END: double_convo_end + MUTATION: + COLUMN_MAPPINGS: + SCRIPTS: # List any python or r scripts that mutate your raw data + IOS: + RAPIDS_COLUMN_MAPPINGS: + TIMESTAMP: timestamp + DEVICE_ID: device_id + DOUBLE_ENERGY: double_energy + INFERENCE: inference + DOUBLE_CONVO_START: double_convo_start + DOUBLE_CONVO_END: double_convo_end + MUTATION: + COLUMN_MAPPINGS: + SCRIPTS: # List any python or r scripts that mutate your raw data + - "src/data/streams/mutations/phone/aware/conversation_ios_timestamp.R" + +PHONE_KEYBOARD: + ANDROID: + RAPIDS_COLUMN_MAPPINGS: + TIMESTAMP: timestamp + DEVICE_ID: device_id + PACKAGE_NAME: package_name + BEFORE_TEXT: before_text + CURRENT_TEXT: current_text + IS_PASSWORD: is_password + MUTATION: + COLUMN_MAPPINGS: + SCRIPTS: # List any python or r scripts that mutate your raw data + +PHONE_LIGHT: + ANDROID: + RAPIDS_COLUMN_MAPPINGS: + TIMESTAMP: timestamp + DEVICE_ID: device_id + DOUBLE_LIGHT_LUX: double_light_lux + ACCURACY: accuracy + MUTATION: + COLUMN_MAPPINGS: + SCRIPTS: # List any python or r scripts that mutate your raw data + +PHONE_LOCATIONS: + ANDROID: + RAPIDS_COLUMN_MAPPINGS: + TIMESTAMP: timestamp + DEVICE_ID: device_id + DOUBLE_LATITUDE: double_latitude + DOUBLE_LONGITUDE: double_longitude + DOUBLE_BEARING: double_bearing + DOUBLE_SPEED: double_speed + DOUBLE_ALTITUDE: double_altitude + PROVIDER: provider + ACCURACY: accuracy + MUTATION: + COLUMN_MAPPINGS: + SCRIPTS: # List any python or r scripts that mutate your raw data + IOS: + RAPIDS_COLUMN_MAPPINGS: + TIMESTAMP: timestamp + DEVICE_ID: device_id + DOUBLE_LATITUDE: double_latitude + DOUBLE_LONGITUDE: double_longitude + DOUBLE_BEARING: double_bearing + DOUBLE_SPEED: double_speed + DOUBLE_ALTITUDE: double_altitude + PROVIDER: provider + ACCURACY: accuracy + MUTATION: + COLUMN_MAPPINGS: + SCRIPTS: # List any python or r scripts that mutate your raw data + +PHONE_LOG: + ANDROID: + RAPIDS_COLUMN_MAPPINGS: + TIMESTAMP: timestamp + DEVICE_ID: device_id + LOG_MESSAGE: log_message + MUTATION: + COLUMN_MAPPINGS: + SCRIPTS: # List any python or r scripts that mutate your raw data + IOS: + RAPIDS_COLUMN_MAPPINGS: + TIMESTAMP: timestamp + DEVICE_ID: device_id + LOG_MESSAGE: log_message + MUTATION: + COLUMN_MAPPINGS: + SCRIPTS: # List any python or r scripts that mutate your raw data + +PHONE_MESSAGES: + ANDROID: + RAPIDS_COLUMN_MAPPINGS: + TIMESTAMP: timestamp + DEVICE_ID: device_id + MESSAGE_TYPE: message_type + TRACE: trace + MUTATION: + COLUMN_MAPPINGS: + SCRIPTS: # List any python or r scripts that mutate your raw data + +PHONE_SCREEN: + ANDROID: + RAPIDS_COLUMN_MAPPINGS: + TIMESTAMP: timestamp + DEVICE_ID: device_id + SCREEN_STATUS: screen_status + MUTATION: + COLUMN_MAPPINGS: + SCRIPTS: # List any python or r scripts that mutate your raw data + IOS: + RAPIDS_COLUMN_MAPPINGS: + TIMESTAMP: timestamp + DEVICE_ID: device_id + SCREEN_STATUS: FLAG_TO_MUTATE + MUTATION: + COLUMN_MAPPINGS: + SCREEN_STATUS: screen_status + SCRIPTS: # List any python or r scripts that mutate your raw data + - "src/data/streams/mutations/phone/aware/screen_ios_unification.R" + +PHONE_WIFI_CONNECTED: + ANDROID: + RAPIDS_COLUMN_MAPPINGS: + TIMESTAMP: timestamp + DEVICE_ID: device_id + MAC_ADDRESS: mac_address + SSID: ssid + BSSID: bssid + MUTATION: + COLUMN_MAPPINGS: + SCRIPTS: # List any python or r scripts that mutate your raw data + IOS: + RAPIDS_COLUMN_MAPPINGS: + TIMESTAMP: timestamp + DEVICE_ID: device_id + MAC_ADDRESS: mac_address + SSID: ssid + BSSID: bssid + MUTATION: + COLUMN_MAPPINGS: + SCRIPTS: # List any python or r scripts that mutate your raw data + +PHONE_WIFI_VISIBLE: + ANDROID: + RAPIDS_COLUMN_MAPPINGS: + TIMESTAMP: timestamp + DEVICE_ID: device_id + SSID: ssid + BSSID: bssid + SECURITY: security + FREQUENCY: frequency + RSSI: rssi + MUTATION: + COLUMN_MAPPINGS: + SCRIPTS: # List any python or r scripts that mutate your raw data + diff --git a/src/data/streams/fitbitjson_csv/container.R b/src/data/streams/fitbitjson_csv/container.R index 79165ef6..0cb49fca 100644 --- a/src/data/streams/fitbitjson_csv/container.R +++ b/src/data/streams/fitbitjson_csv/container.R @@ -24,8 +24,8 @@ pull_data <- function(stream_parameters, device, sensor, sensor_container, colum # Letting the user know what we are doing message(paste0("Reading this CSV file: ", data_file)) - sensor_data <- read_csv_chunked(data_file, - callback = DataFrameCallback$new(function(x, pos) subset(x,x[[columns$DEVICE_ID]] == device, select = unlist(columns))), progress = T, chunk_size = 50000) + sensor_data <- read_delim_chunked(data_file, escape_backslash = TRUE, delim = ",", escape_double = FALSE, quote = "\"", + callback = DataFrameCallback$new(function(x, pos) x[x[[columns$DEVICE_ID]] == device, unlist(columns, use.names = FALSE)] ), progress = T, chunk_size = 50000) if(is.null(sensor_data)) # emtpy file sensor_data <- read.csv(data_file) diff --git a/src/data/streams/fitbitparsed_csv/container.R b/src/data/streams/fitbitparsed_csv/container.R index dca02566..f010e60b 100644 --- a/src/data/streams/fitbitparsed_csv/container.R +++ b/src/data/streams/fitbitparsed_csv/container.R @@ -24,8 +24,8 @@ pull_data <- function(stream_parameters, device, sensor, sensor_container, colum # Letting the user know what we are doing message(paste0("Reading this CSV file: ", data_file)) - sensor_data <- read_csv_chunked(data_file, - callback = DataFrameCallback$new(function(x, pos) subset(x,x[[columns$DEVICE_ID]] == device, select = unlist(columns))), progress = T, chunk_size = 50000) + sensor_data <- read_delim_chunked(data_file, escape_backslash = TRUE, delim = ",", escape_double = FALSE, quote = "\"", + callback = DataFrameCallback$new(function(x, pos) x[x[[columns$DEVICE_ID]] == device, unlist(columns, use.names = FALSE)] ), progress = T, chunk_size = 50000) if(is.null(sensor_data)) # emtpy file sensor_data <- read.csv(data_file) diff --git a/src/data/streams/mutations/phone/aware/activity_recogniton_ios_unification.R b/src/data/streams/mutations/phone/aware/activity_recogniton_ios_unification.R index 5690f877..a6226c60 100644 --- a/src/data/streams/mutations/phone/aware/activity_recogniton_ios_unification.R +++ b/src/data/streams/mutations/phone/aware/activity_recogniton_ios_unification.R @@ -25,9 +25,8 @@ clean_ios_activity_column <- function(ios_gar){ } unify_ios_activity_recognition <- function(ios_gar){ - # We only need to unify Google Activity Recognition data for iOS # discard rows where activities column is blank - ios_gar <- ios_gar[-which(ios_gar$activities == ""), ] + ios_gar <- ios_gar %>% filter(!is.na(activities) & activities != "" ) # clean "activities" column of ios_gar ios_gar <- clean_ios_activity_column(ios_gar) diff --git a/src/data/streams/pull_phone_data.R b/src/data/streams/pull_phone_data.R index e9478a93..b0ba2244 100644 --- a/src/data/streams/pull_phone_data.R +++ b/src/data/streams/pull_phone_data.R @@ -163,7 +163,7 @@ pull_phone_data <- function(){ participant_data <- rbind(participant_data, mutated_data) } - + participant_data <- participant_data %>% arrange(timestamp) write_csv(participant_data, output_data_file) } diff --git a/src/data/streams/pull_wearable_data.R b/src/data/streams/pull_wearable_data.R index 990bba41..50184a3b 100644 --- a/src/data/streams/pull_wearable_data.R +++ b/src/data/streams/pull_wearable_data.R @@ -114,6 +114,10 @@ pull_wearable_data_main <- function(){ participant_data <- rbind(participant_data, mutated_data) } + if(device_type == "fitbit") + participant_data <- participant_data %>% arrange(local_date_time) + else + participant_data <- participant_data %>% arrange(timestamp) write_csv(participant_data, output_data_file) }