2020-10-23 16:41:00 +02:00
library ( " dplyr" , warn.conflicts = F )
2020-06-30 23:34:18 +02:00
library ( stringr )
unify_ios_screen <- function ( ios_screen ) {
# In Android we only process UNLOCK to OFF episodes. In iOS we only process UNLOCK to LOCKED episodes,
# thus, we replace LOCKED with OFF episodes (2 to 0) so we can use Android's code for iOS
ios_screen <- ios_screen %>%
# only keep consecutive pairs of 3,2 events
filter ( ( screen_status == 3 & lead ( screen_status ) == 2 ) | ( screen_status == 2 & lag ( screen_status ) == 3 ) ) %>%
mutate ( screen_status = replace ( screen_status , screen_status == 2 , 0 ) )
return ( ios_screen )
}
unify_ios_battery <- function ( ios_battery ) {
# We only need to unify battery data for iOS client V1. V2 does it out-of-the-box
# V1 will not have rows where battery_status is equal to 4
if ( nrow ( ios_battery %>% filter ( battery_status == 4 ) ) == 0 )
ios_battery <- ios_battery %>%
mutate ( battery_status = replace ( battery_status , battery_status == 3 , 5 ) ,
battery_status = replace ( battery_status , battery_status == 1 , 3 ) )
return ( ios_battery )
}
unify_ios_calls <- function ( ios_calls ) {
# Android’ s call types 1=incoming, 2=outgoing, 3=missed
# iOS' call status 1=incoming, 2=connected, 3=dialing, 4=disconnected
# iOS' call types based on call status: (1,2,4)=incoming=1, (3,2,4)=outgoing=2, (1,4) or (3,4)=missed=3
# Sometimes (due to a possible bug in Aware) sequences get logged on the exact same timestamp, thus 3-item sequences can be 2,3,4 or 3,2,4
# Even tho iOS stores the duration of ringing/dialing for missed calls, we set it to 0 to match Android
ios_calls <- ios_calls %>%
arrange ( trace , timestamp , call_type ) %>%
group_by ( trace ) %>%
# search for the disconnect event, as it is common to outgoing, received and missed calls
mutate ( completed_call = ifelse ( call_type == 4 , 2 , 0 ) ,
# assign the same ID to all events before a 4
completed_call = cumsum ( c ( 1 , head ( completed_call , -1 ) != tail ( completed_call , -1 ) ) ) ,
# hack to match ID of last event (4) to that of the previous rows
completed_call = ifelse ( call_type == 4 , completed_call - 1 , completed_call ) )
# We check utc_date_time and local_date_time exist because sometimes we call this function from
# download_dataset to unify multi-platform participants. At that point such time columns are missing
if ( " utc_date_time" %in% colnames ( ios_calls ) && " local_date_time" %in% colnames ( ios_calls ) ) {
ios_calls <- ios_calls %>% summarise ( call_type_sequence = paste ( call_type , collapse = " ," ) , # collapse all events before a 4
# sanity check, timestamp_diff should be equal or close to duration sum
# timestamp_diff = trunc((last(timestamp) - first(timestamp)) / 1000)
2020-07-20 21:16:42 +02:00
# use call_duration = last(call_duration) if you want duration from pick up to hang up
# use call_duration = sum(call_duration) if you want duration from dialing/ringing to hang up
call_duration = last ( call_duration ) ,
2020-06-30 23:34:18 +02:00
timestamp = first ( timestamp ) ,
utc_date_time = first ( utc_date_time ) ,
local_date_time = first ( local_date_time ) ,
local_date = first ( local_date ) ,
local_time = first ( local_time ) ,
local_hour = first ( local_hour ) ,
local_minute = first ( local_minute ) ,
2020-09-14 20:21:36 +02:00
local_timezone = first ( local_timezone ) ,
2020-08-26 18:09:53 +02:00
assigned_segments = first ( assigned_segments ) )
2020-06-30 23:34:18 +02:00
}
else {
ios_calls <- ios_calls %>% summarise ( call_type_sequence = paste ( call_type , collapse = " ," ) , call_duration = sum ( call_duration ) , timestamp = first ( timestamp ) )
}
ios_calls <- ios_calls %>% mutate ( call_type = case_when (
call_type_sequence == " 1,2,4" | call_type_sequence == " 2,1,4" ~ 1 , # incoming
call_type_sequence == " 1,4" ~ 3 , # missed
call_type_sequence == " 3,2,4" | call_type_sequence == " 2,3,4" ~ 2 , # outgoing
call_type_sequence == " 3,4" ~ 4 , # outgoing missed, we create this temp missed state to assign a duration of 0 below
TRUE ~ -1 ) , # other, call sequences without a disconnect (4) event are discarded
# assign a duration of 0 to incoming and outgoing missed calls
call_duration = ifelse ( call_type == 3 | call_type == 4 , 0 , call_duration ) ,
2020-07-20 21:16:42 +02:00
# get rid of the temp missed call type, set to 2 to match Android. See https://github.com/carissalow/rapids/issues/79
call_type = ifelse ( call_type == 4 , 2 , call_type )
2020-06-30 23:34:18 +02:00
) %>%
# discard sequences without an event 4 (disconnect)
filter ( call_type > 0 ) %>%
ungroup ( ) %>%
arrange ( timestamp )
return ( ios_calls )
}
clean_ios_activity_column <- function ( ios_gar ) {
ios_gar <- ios_gar %>%
mutate ( activities = str_replace_all ( activities , pattern = ' ("|\\[|\\])' , replacement = " " ) )
existent_multiple_activities <- ios_gar %>%
filter ( str_detect ( activities , " ," ) ) %>%
group_by ( activities ) %>%
summarise ( mutiple_activities = unique ( activities ) ) %>%
pull ( mutiple_activities )
known_multiple_activities <- c ( " stationary,automotive" )
unkown_multiple_actvities <- setdiff ( existent_multiple_activities , known_multiple_activities )
if ( length ( unkown_multiple_actvities ) > 0 ) {
stop ( paste0 ( " There are unkwown combinations of ios activities, you need to implement the decision of the ones to keep: " , unkown_multiple_actvities ) )
}
ios_gar <- ios_gar %>%
mutate ( activities = str_replace_all ( activities , pattern = " stationary,automotive" , replacement = " automotive" ) )
return ( ios_gar )
}
2020-10-19 21:07:12 +02:00
unify_ios_activity_recognition <- function ( ios_gar ) {
2020-06-30 23:34:18 +02:00
# We only need to unify Google Activity Recognition data for iOS
# discard rows where activities column is blank
ios_gar <- ios_gar [ - which ( ios_gar $ activities == " " ) , ]
# clean "activities" column of ios_gar
ios_gar <- clean_ios_activity_column ( ios_gar )
# make it compatible with android version: generate "activity_name" and "activity_type" columns
ios_gar <- ios_gar %>%
mutate ( activity_name = case_when ( activities == " automotive" ~ " in_vehicle" ,
activities == " cycling" ~ " on_bicycle" ,
2020-10-26 20:44:09 +01:00
activities == " walking" ~ " walking" ,
activities == " running" ~ " running" ,
2020-06-30 23:34:18 +02:00
activities == " stationary" ~ " still" ) ,
activity_type = case_when ( activities == " automotive" ~ 0 ,
activities == " cycling" ~ 1 ,
2020-10-26 20:44:09 +01:00
activities == " walking" ~ 7 ,
activities == " running" ~ 8 ,
2020-06-30 23:34:18 +02:00
activities == " stationary" ~ 3 ,
activities == " unknown" ~ 4 ) )
return ( ios_gar )
}
2020-08-11 22:18:06 +02:00
unify_ios_conversation <- function ( conversation ) {
if ( nrow ( conversation ) > 0 ) {
duration_check <- conversation %>%
select ( double_convo_start , double_convo_end ) %>%
mutate ( start_is_seconds = double_convo_start <= 9999999999 ,
end_is_seconds = double_convo_end <= 9999999999 ) # Values smaller than 9999999999 are in seconds instead of milliseconds
start_end_in_seconds = sum ( duration_check $ start_is_seconds ) + sum ( duration_check $ end_is_seconds )
if ( start_end_in_seconds > 0 ) # convert seconds to milliseconds
conversation <- conversation %>% mutate ( double_convo_start = double_convo_start * 1000 , double_convo_end = double_convo_end * 1000 )
}
return ( conversation )
}
2020-06-30 23:34:18 +02:00
# This function is used in download_dataset.R
2020-10-21 01:12:01 +02:00
unify_raw_data <- function ( dbEngine , sensor_table , sensor , timestamp_filter , aware_multiplatform_tables , device_ids , platforms ) {
2020-06-30 23:34:18 +02:00
# If platforms is 'multiple', fetch each device_id's platform from aware_device, otherwise, use those given by the user
if ( length ( platforms ) == 1 && platforms == " multiple" )
devices_platforms <- dbGetQuery ( dbEngine , paste0 ( " SELECT device_id,brand FROM aware_device WHERE device_id IN ('" , paste0 ( device_ids , collapse = " ','" ) , " ')" ) ) %>%
mutate ( platform = ifelse ( brand == " iPhone" , " ios" , " android" ) )
else
devices_platforms <- data.frame ( device_id = device_ids , platform = platforms )
# Get existent tables in database
2020-10-19 21:07:12 +02:00
available_tables_in_db <- dbGetQuery ( dbEngine , paste0 ( " SELECT table_name FROM information_schema.tables WHERE table_schema='" , dbGetInfo ( dbEngine ) $ dbname , " '" ) ) [ [1 ] ]
if ( ! any ( sensor_table %in% available_tables_in_db ) )
stop ( paste0 ( " You requested data from these table(s) " , paste0 ( sensor_table , collapse = " , " ) , " but they don't exist in your database " , dbGetInfo ( dbEngine ) $ dbname ) )
2020-06-30 23:34:18 +02:00
# Parse the table names for activity recognition and conversation plugins because they are different between android and ios
ar_tables <- setNames ( aware_multiplatform_tables [1 : 2 ] , c ( " android" , " ios" ) )
conversation_tables <- setNames ( aware_multiplatform_tables [3 : 4 ] , c ( " android" , " ios" ) )
participants_sensordata <- list ( )
for ( i in 1 : nrow ( devices_platforms ) ) {
row <- devices_platforms [i , ]
device_id <- row $ device_id
platform <- row $ platform
# Handle special cases when tables for the same sensor have different names for Android and iOS (AR and conversation)
2020-10-19 21:07:12 +02:00
if ( length ( sensor_table ) == 1 )
table <- sensor_table
else if ( all ( sensor_table == ar_tables ) )
2020-06-30 23:34:18 +02:00
table <- ar_tables [ [platform ] ]
2020-10-19 21:07:12 +02:00
else if ( all ( sensor_table == conversation_tables ) )
2020-06-30 23:34:18 +02:00
table <- conversation_tables [ [platform ] ]
if ( table %in% available_tables_in_db ) {
2020-10-21 01:12:01 +02:00
query <- paste0 ( " SELECT * FROM " , table , " WHERE device_id IN ('" , device_id , " ')" , timestamp_filter )
2020-10-19 21:07:12 +02:00
sensor_data <- unify_data ( dbGetQuery ( dbEngine , query ) , sensor , platform )
2020-06-30 23:34:18 +02:00
participants_sensordata <- append ( participants_sensordata , list ( sensor_data ) )
} else {
warning ( paste0 ( " Missing " , table , " table. We unified the data from " , paste0 ( devices_platforms $ device_id , collapse = " and " ) , " but without records from this missing table for " , device_id ) )
}
}
unified_data <- bind_rows ( participants_sensordata )
return ( unified_data )
}
# This function is used in unify_ios_android.R and unify_raw_data function
2020-10-19 21:07:12 +02:00
unify_data <- function ( sensor_data , sensor , platform ) {
if ( sensor == " phone_calls" & platform == " ios" ) {
sensor_data = unify_ios_calls ( sensor_data )
} else if ( sensor == " phone_battery" & platform == " ios" ) {
sensor_data = unify_ios_battery ( sensor_data )
} else if ( sensor == " phone_activity_recognition" & platform == " ios" ) {
sensor_data = unify_ios_activity_recognition ( sensor_data )
} else if ( sensor == " phone_screen" & platform == " ios" ) {
sensor_data = unify_ios_screen ( sensor_data )
} else if ( sensor == " phone_conversation" & platform == " ios" ) {
2020-08-11 22:18:06 +02:00
sensor_data = unify_ios_conversation ( sensor_data )
2020-06-30 23:34:18 +02:00
}
return ( sensor_data )
}