diff --git a/rules/preprocessing.snakefile b/rules/preprocessing.snakefile index 20b92301..1eae30f8 100644 --- a/rules/preprocessing.snakefile +++ b/rules/preprocessing.snakefile @@ -30,4 +30,15 @@ rule phone_valid_sensed_days: output: "data/interim/{pid}/phone_valid_sensed_days.csv" script: - "../src/data/phone_valid_sensed_days.R" \ No newline at end of file + "../src/data/phone_valid_sensed_days.R" + +rule unify_ios_android: + input: + sensor_data = "data/raw/{pid}/{sensor}_with_datetime.csv", + participant_info = "data/external/{pid}" + params: + sensor = "{sensor}" + output: + "data/raw/{pid}/{sensor}_with_datetime_unified.csv" + script: + "../src/data/unify_ios_android.R" \ No newline at end of file diff --git a/src/data/unify_ios_android.R b/src/data/unify_ios_android.R new file mode 100644 index 00000000..7136421f --- /dev/null +++ b/src/data/unify_ios_android.R @@ -0,0 +1,70 @@ +source("packrat/init.R") + +library(dplyr) + +unify_ios_calls <- function(ios_calls){ + # Android’s call types 1=incoming, 2=outgoing, 3=missed + # iOS' call status 1=incoming, 2=connected, 3=dialing, 4=disconnected + # iOS' call types based on call status: (1,2,4)=incoming=1, (3,2,4)=outgoing=2, (1,4) or (3,4)=missed=3 + # Sometimes (due to a possible bug in Aware) sequences get logged on the exact same timestamp, thus 3-item sequences can be 2,3,4 or 3,2,4 + # Even tho iOS stores the duration of ringing/dialing for missed calls, we set it to 0 to match Android + + ios_calls <- ios_calls %>% + arrange(trace, timestamp, call_type) %>% + group_by(trace) %>% + # search for the disconnect event, as it is common to outgoing, received and missed calls + mutate(completed_call = ifelse(call_type == 4, 2, 0), + # assign the same ID to all events before a 4 + completed_call = cumsum(c(1, head(completed_call, -1) != tail(completed_call, -1))), + # hack to match ID of last event (4) to that of the previous rows + completed_call = ifelse(call_type == 4, completed_call - 1, completed_call)) %>% + summarise(call_type_sequence = paste(call_type, collapse = ","), # collapse all events before a 4 + # use this if Android measures calls' duration from pick up to hang up + # duration = last(call_duration), + # sanity check, timestamp_diff should be equal or close to duration sum + # timestamp_diff = trunc((last(timestamp) - first(timestamp)) / 1000) + # use this if Android measures calls' duration from dialing/ringing to hang up + call_duration = sum(call_duration), + + timestamp = first(timestamp), + utc_date_time = first(utc_date_time), + local_date_time = first(local_date_time), + local_date = first(local_date), + local_time = first(local_time), + local_hour = first(local_hour), + local_minute = first(local_minute), + local_day_segment = first(local_day_segment) + ) %>% + mutate(call_type = case_when( + call_type_sequence == "1,2,4" | call_type_sequence == "2,1,4" ~ 1, # incoming + call_type_sequence == "1,4" ~ 3, # missed + call_type_sequence == "3,2,4" | call_type_sequence == "2,3,4" ~ 2, # outgoing + call_type_sequence == "3,4" ~ 4, # outgoing missed, we create this temp missed state to assign a duration of 0 below + TRUE ~ -1), # other, call sequences without a disconnect (4) event are discarded + # assign a duration of 0 to incoming and outgoing missed calls + call_duration = ifelse(call_type == 3 | call_type == 4, 0, call_duration), + # get rid of the temp missed call type, set to 3 to match Android + call_type = ifelse(call_type == 4, 3, call_type) + ) %>% + # discard sequences without an event 4 (disconnect) + filter(call_type > 0) %>% + ungroup() %>% + arrange(timestamp) + + return(ios_calls) +} + + +sensor_data <- read.csv(snakemake@input[["sensor_data"]], stringsAsFactors = FALSE) +participant_info <- snakemake@input[["participant_info"]] +sensor <- snakemake@params[["sensor"]] +platform <- readLines(participant_info, n=2)[[2]] + +if(sensor == "calls"){ + if(platform == "android"){ + write.csv(sensor_data, snakemake@output[[1]], row.names = FALSE) + } else if(platform == "ios"){ + sensor_data = unify_ios_calls(sensor_data) + write.csv(sensor_data, snakemake@output[[1]], row.names = FALSE) + } +}