Migrate phone calls sensor to new data stream
parent
0e96f39599
commit
c1682d8cd3
|
@ -46,7 +46,6 @@ for provider in config["PHONE_CALLS"]["PROVIDERS"].keys():
|
||||||
if config["PHONE_CALLS"]["PROVIDERS"][provider]["COMPUTE"]:
|
if config["PHONE_CALLS"]["PROVIDERS"][provider]["COMPUTE"]:
|
||||||
files_to_compute.extend(expand("data/raw/{pid}/phone_calls_raw.csv", pid=config["PIDS"]))
|
files_to_compute.extend(expand("data/raw/{pid}/phone_calls_raw.csv", pid=config["PIDS"]))
|
||||||
files_to_compute.extend(expand("data/raw/{pid}/phone_calls_with_datetime.csv", pid=config["PIDS"]))
|
files_to_compute.extend(expand("data/raw/{pid}/phone_calls_with_datetime.csv", pid=config["PIDS"]))
|
||||||
files_to_compute.extend(expand("data/raw/{pid}/phone_calls_with_datetime_unified.csv", pid=config["PIDS"]))
|
|
||||||
files_to_compute.extend(expand("data/interim/{pid}/phone_calls_features/phone_calls_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["PHONE_CALLS"]["PROVIDERS"][provider]["SRC_LANGUAGE"].lower(), provider_key=provider.lower()))
|
files_to_compute.extend(expand("data/interim/{pid}/phone_calls_features/phone_calls_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["PHONE_CALLS"]["PROVIDERS"][provider]["SRC_LANGUAGE"].lower(), provider_key=provider.lower()))
|
||||||
files_to_compute.extend(expand("data/processed/features/{pid}/phone_calls.csv", pid=config["PIDS"]))
|
files_to_compute.extend(expand("data/processed/features/{pid}/phone_calls.csv", pid=config["PIDS"]))
|
||||||
files_to_compute.extend(expand("data/processed/features/{pid}/all_sensor_features.csv", pid=config["PIDS"]))
|
files_to_compute.extend(expand("data/processed/features/{pid}/all_sensor_features.csv", pid=config["PIDS"]))
|
||||||
|
|
|
@ -216,6 +216,81 @@ Stream columns named `FLAG_TO_MUTATE` means they are extracted based on the `MUT
|
||||||
This sensor is not supported by iOS devices.
|
This sensor is not supported by iOS devices.
|
||||||
|
|
||||||
|
|
||||||
|
??? info "PHONE_CALLS"
|
||||||
|
|
||||||
|
=== "ANDROID"
|
||||||
|
|
||||||
|
**RAPIDS_COLUMN_MAPPINGS**
|
||||||
|
|
||||||
|
| RAPIDS column | Stream column |
|
||||||
|
|----------------------|---------------------|
|
||||||
|
| TIMESTAMP | timestamp |
|
||||||
|
| DEVICE_ID | device_id |
|
||||||
|
| CALL_TYPE | call_type |
|
||||||
|
| CALL_DURATION | call_duration |
|
||||||
|
| TRACE | trace |
|
||||||
|
|
||||||
|
**MUTATION**
|
||||||
|
|
||||||
|
- **COLUMN_MAPPINGS** (None)
|
||||||
|
- **SCRIPTS** (None)
|
||||||
|
|
||||||
|
=== "IOS"
|
||||||
|
|
||||||
|
**RAPIDS_COLUMN_MAPPINGS**
|
||||||
|
|
||||||
|
| RAPIDS column | Stream column |
|
||||||
|
|----------------------|---------------------|
|
||||||
|
| TIMESTAMP | timestamp |
|
||||||
|
| DEVICE_ID | device_id |
|
||||||
|
| CALL_TYPE | FLAG_TO_MUTATE |
|
||||||
|
| CALL_DURATION | call_duration |
|
||||||
|
| TRACE | trace |
|
||||||
|
|
||||||
|
**MUTATION**
|
||||||
|
|
||||||
|
- **COLUMN_MAPPINGS**
|
||||||
|
|
||||||
|
| Script column | Stream column |
|
||||||
|
|----------------------|---------------------|
|
||||||
|
| CALL_TYPE | call_type |
|
||||||
|
|
||||||
|
|
||||||
|
- **SCRIPTS**
|
||||||
|
|
||||||
|
```bash
|
||||||
|
src/data/streams/mutations/phone/aware/calls_ios_unification.R
|
||||||
|
```
|
||||||
|
|
||||||
|
!!! note
|
||||||
|
|
||||||
|
We transform iOS call logs into Android's format. iOS stores call status: 1=incoming, 2=connected, 3=dialing, 4=disconnected, as opposed to Android's events: 1=incoming, 2=outgoing, 3=missed.
|
||||||
|
|
||||||
|
We follow this algorithm to convert iOS call data (there are some inaccuracies in the way we handle sequences, see new rules below):
|
||||||
|
|
||||||
|
- Search for the disconnected (4) status as it is common to all calls
|
||||||
|
- Group all events that preceded every status 4
|
||||||
|
- We convert every 1,2,4 (or 2,1,4) sequence to an incoming call
|
||||||
|
- We convert every 3,2,4 (or 2,3,4) sequence to an outgoing call
|
||||||
|
- We convert every 1,4 or 3,4 sequence to a missed call (either incoming or outgoing)
|
||||||
|
- We set the duration of the call to be the sum of every status (dialing/ringing to hangup) as opposed to the duration of the last status (pick up to hang up)
|
||||||
|
|
||||||
|
**Tested with an Android (OnePlus 7T) and an iPhone XR**
|
||||||
|
|
||||||
|
|Call type | Android (duration) | iOS (duration) | New Rule|
|
||||||
|
|---------|----------|--------|------|
|
||||||
|
|Outgoing missed ended by me | 2 (0) | 3,4 (0,X) | 3,4 is converted to 2 with duration 0|
|
||||||
|
|Outgoing missed ended by them|2(0)|3,2,4 (0,X,X2)| 3,2,4 is converted to 2 with duration X2*|
|
||||||
|
|Incoming missed ended by me|NA**|1,4 (0,X)|1,4 is converted to 3 with duration 0|
|
||||||
|
|Incoming missed ended by them|3(0)|1,4 (0,X)|1,4 is converted to 3 with duration 0|
|
||||||
|
|Outgoing answered|2(X excluding dialing time)|3,2,4 (0,X,X2)|3,2,4 is converted to 2 with duration X2|
|
||||||
|
|Incoming answered|1(X excluding dialing time)|1,2,4 (0,X,X2)|1,2,4 is converted to 1 with duration X2|
|
||||||
|
|
||||||
|
.* There is no way to differentiate an outgoing missed call ended by them from an outgoing answered call because the phone goes directly to voice mail and it counts as call time (essentially the voice mail answered).
|
||||||
|
|
||||||
|
.** Android does not record incoming missed calls ended by the participant, just those ended by the person calling or ignored by the participant.
|
||||||
|
|
||||||
|
|
||||||
??? info "PHONE_CONVERSATION"
|
??? info "PHONE_CONVERSATION"
|
||||||
|
|
||||||
=== "ANDROID"
|
=== "ANDROID"
|
||||||
|
|
|
@ -57,6 +57,17 @@ This is a description of the format RAPIDS needs to process data for the followi
|
||||||
| BT_RSSI | The RSSI dB to the scanned device |
|
| BT_RSSI | The RSSI dB to the scanned device |
|
||||||
|
|
||||||
|
|
||||||
|
??? info "PHONE_CALLS"
|
||||||
|
|
||||||
|
| RAPIDS column | Description |
|
||||||
|
|--------------------|---------------------------------------------------------------------------|
|
||||||
|
| TIMESTAMP | An UNIX timestamp (13 digits) when a row of data was logged |
|
||||||
|
| DEVICE_ID | A string that uniquely identifies a device |
|
||||||
|
| CALL_TYPE | An integer that denotes call type: 1 = incoming, 2 = outgoing, 3 = missed |
|
||||||
|
| CALL_DURATION | Length of the call session |
|
||||||
|
| TRACE | SHA-1 one-way source/target of the call |
|
||||||
|
|
||||||
|
|
||||||
??? info "PHONE_CONVERSATION"
|
??? info "PHONE_CONVERSATION"
|
||||||
|
|
||||||
| RAPIDS column | Description |
|
| RAPIDS column | Description |
|
||||||
|
|
|
@ -16,7 +16,6 @@ Sensor parameters description for `[PHONE_CALLS]`:
|
||||||
```bash
|
```bash
|
||||||
- data/raw/{pid}/phone_calls_raw.csv
|
- data/raw/{pid}/phone_calls_raw.csv
|
||||||
- data/raw/{pid}/phone_calls_with_datetime.csv
|
- data/raw/{pid}/phone_calls_with_datetime.csv
|
||||||
- data/raw/{pid}/phone_calls_with_datetime_unified.csv
|
|
||||||
- data/interim/{pid}/phone_calls_features/phone_calls_{language}_{provider_key}.csv
|
- data/interim/{pid}/phone_calls_features/phone_calls_{language}_{provider_key}.csv
|
||||||
- data/processed/features/{pid}/phone_calls.csv
|
- data/processed/features/{pid}/phone_calls.csv
|
||||||
```
|
```
|
||||||
|
|
|
@ -264,7 +264,7 @@ rule phone_bluetooth_r_features:
|
||||||
|
|
||||||
rule calls_python_features:
|
rule calls_python_features:
|
||||||
input:
|
input:
|
||||||
sensor_data = "data/raw/{pid}/phone_calls_with_datetime_unified.csv",
|
sensor_data = "data/raw/{pid}/phone_calls_with_datetime.csv",
|
||||||
time_segments_labels = "data/interim/time_segments/{pid}_time_segments_labels.csv"
|
time_segments_labels = "data/interim/time_segments/{pid}_time_segments_labels.csv"
|
||||||
params:
|
params:
|
||||||
provider = lambda wildcards: config["PHONE_CALLS"]["PROVIDERS"][wildcards.provider_key.upper()],
|
provider = lambda wildcards: config["PHONE_CALLS"]["PROVIDERS"][wildcards.provider_key.upper()],
|
||||||
|
@ -277,7 +277,7 @@ rule calls_python_features:
|
||||||
|
|
||||||
rule calls_r_features:
|
rule calls_r_features:
|
||||||
input:
|
input:
|
||||||
sensor_data = "data/raw/{pid}/phone_calls_with_datetime_unified.csv",
|
sensor_data = "data/raw/{pid}/phone_calls_with_datetime.csv",
|
||||||
time_segments_labels = "data/interim/time_segments/{pid}_time_segments_labels.csv"
|
time_segments_labels = "data/interim/time_segments/{pid}_time_segments_labels.csv"
|
||||||
params:
|
params:
|
||||||
provider = lambda wildcards: config["PHONE_CALLS"]["PROVIDERS"][wildcards.provider_key.upper()],
|
provider = lambda wildcards: config["PHONE_CALLS"]["PROVIDERS"][wildcards.provider_key.upper()],
|
||||||
|
|
|
@ -93,6 +93,30 @@ PHONE_BLUETOOTH:
|
||||||
COLUMN_MAPPINGS:
|
COLUMN_MAPPINGS:
|
||||||
SCRIPTS: # List any python or r scripts that mutate your raw data
|
SCRIPTS: # List any python or r scripts that mutate your raw data
|
||||||
|
|
||||||
|
PHONE_CALLS:
|
||||||
|
ANDROID:
|
||||||
|
RAPIDS_COLUMN_MAPPINGS:
|
||||||
|
TIMESTAMP: timestamp
|
||||||
|
DEVICE_ID: device_id
|
||||||
|
CALL_TYPE: call_type
|
||||||
|
CALL_DURATION: call_duration
|
||||||
|
TRACE: trace
|
||||||
|
MUTATION:
|
||||||
|
COLUMN_MAPPINGS:
|
||||||
|
SCRIPTS: # List any python or r scripts that mutate your raw data
|
||||||
|
IOS:
|
||||||
|
RAPIDS_COLUMN_MAPPINGS:
|
||||||
|
TIMESTAMP: timestamp
|
||||||
|
DEVICE_ID: device_id
|
||||||
|
CALL_TYPE: FLAG_TO_MUTATE
|
||||||
|
CALL_DURATION: call_duration
|
||||||
|
TRACE: trace
|
||||||
|
MUTATION:
|
||||||
|
COLUMN_MAPPINGS:
|
||||||
|
CALL_TYPE: call_type
|
||||||
|
SCRIPTS:
|
||||||
|
- "src/data/streams/mutations/phone/aware/calls_ios_unification.R"
|
||||||
|
|
||||||
PHONE_CONVERSATION:
|
PHONE_CONVERSATION:
|
||||||
ANDROID:
|
ANDROID:
|
||||||
RAPIDS_COLUMN_MAPPINGS:
|
RAPIDS_COLUMN_MAPPINGS:
|
||||||
|
|
|
@ -0,0 +1,67 @@
|
||||||
|
source("renv/activate.R")
|
||||||
|
library("dplyr", warn.conflicts = F)
|
||||||
|
|
||||||
|
|
||||||
|
unify_ios_calls <- function(ios_calls){
|
||||||
|
# Android’s call types 1=incoming, 2=outgoing, 3=missed
|
||||||
|
# iOS' call status 1=incoming, 2=connected, 3=dialing, 4=disconnected
|
||||||
|
# iOS' call types based on call status: (1,2,4)=incoming=1, (3,2,4)=outgoing=2, (1,4) or (3,4)=missed=3
|
||||||
|
# Sometimes (due to a possible bug in Aware) sequences get logged on the exact same timestamp, thus 3-item sequences can be 2,3,4 or 3,2,4
|
||||||
|
# Even tho iOS stores the duration of ringing/dialing for missed calls, we set it to 0 to match Android
|
||||||
|
|
||||||
|
ios_calls <- ios_calls %>%
|
||||||
|
arrange(trace, timestamp, call_type) %>%
|
||||||
|
group_by(trace) %>%
|
||||||
|
# search for the disconnect event, as it is common to outgoing, received and missed calls
|
||||||
|
mutate(completed_call = ifelse(call_type == 4, 2, 0),
|
||||||
|
# assign the same ID to all events before a 4
|
||||||
|
completed_call = cumsum(c(1, head(completed_call, -1) != tail(completed_call, -1))),
|
||||||
|
# hack to match ID of last event (4) to that of the previous rows
|
||||||
|
completed_call = ifelse(call_type == 4, completed_call - 1, completed_call))
|
||||||
|
|
||||||
|
# We check utc_date_time and local_date_time exist because sometimes we call this function from
|
||||||
|
# download_dataset to unify multi-platform participants. At that point such time columns are missing
|
||||||
|
if("utc_date_time" %in% colnames(ios_calls) && "local_date_time" %in% colnames(ios_calls)){
|
||||||
|
ios_calls <- ios_calls %>% summarise(call_type_sequence = paste(call_type, collapse = ","), # collapse all events before a 4
|
||||||
|
# sanity check, timestamp_diff should be equal or close to duration sum
|
||||||
|
# timestamp_diff = trunc((last(timestamp) - first(timestamp)) / 1000)
|
||||||
|
# use call_duration = last(call_duration) if you want duration from pick up to hang up
|
||||||
|
# use call_duration = sum(call_duration) if you want duration from dialing/ringing to hang up
|
||||||
|
call_duration = last(call_duration),
|
||||||
|
timestamp = first(timestamp),
|
||||||
|
utc_date_time = first(utc_date_time),
|
||||||
|
local_date_time = first(local_date_time),
|
||||||
|
local_date = first(local_date),
|
||||||
|
local_time = first(local_time),
|
||||||
|
local_hour = first(local_hour),
|
||||||
|
local_minute = first(local_minute),
|
||||||
|
local_timezone = first(local_timezone),
|
||||||
|
assigned_segments = first(assigned_segments))
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
ios_calls <- ios_calls %>% summarise(call_type_sequence = paste(call_type, collapse = ","), call_duration = sum(call_duration), timestamp = first(timestamp), device_id = first(device_id))
|
||||||
|
}
|
||||||
|
ios_calls <- ios_calls %>% mutate(call_type = case_when(
|
||||||
|
call_type_sequence == "1,2,4" | call_type_sequence == "2,1,4" ~ 1, # incoming
|
||||||
|
call_type_sequence == "1,4" ~ 3, # missed
|
||||||
|
call_type_sequence == "3,2,4" | call_type_sequence == "2,3,4" ~ 2, # outgoing
|
||||||
|
call_type_sequence == "3,4" ~ 4, # outgoing missed, we create this temp missed state to assign a duration of 0 below
|
||||||
|
TRUE ~ -1), # other, call sequences without a disconnect (4) event are discarded
|
||||||
|
# assign a duration of 0 to incoming and outgoing missed calls
|
||||||
|
call_duration = ifelse(call_type == 3 | call_type == 4, 0, call_duration),
|
||||||
|
# get rid of the temp missed call type, set to 2 to match Android. See https://github.com/carissalow/rapids/issues/79
|
||||||
|
call_type = ifelse(call_type == 4, 2, call_type)
|
||||||
|
) %>%
|
||||||
|
# discard sequences without an event 4 (disconnect)
|
||||||
|
filter(call_type > 0) %>%
|
||||||
|
ungroup() %>%
|
||||||
|
arrange(timestamp)
|
||||||
|
|
||||||
|
ios_calls <- select(ios_calls, -call_type_sequence)
|
||||||
|
|
||||||
|
return(ios_calls)
|
||||||
|
}
|
||||||
|
|
||||||
|
main <- function(data, stream_parameters){
|
||||||
|
return(unify_ios_calls(data))
|
||||||
|
}
|
|
@ -33,6 +33,13 @@ PHONE_BLUETOOTH:
|
||||||
- BT_NAME
|
- BT_NAME
|
||||||
- BT_RSSI
|
- BT_RSSI
|
||||||
|
|
||||||
|
PHONE_CALLS:
|
||||||
|
- TIMESTAMP
|
||||||
|
- DEVICE_ID
|
||||||
|
- CALL_TYPE
|
||||||
|
- CALL_DURATION
|
||||||
|
- TRACE
|
||||||
|
|
||||||
PHONE_CONVERSATION:
|
PHONE_CONVERSATION:
|
||||||
- TIMESTAMP
|
- TIMESTAMP
|
||||||
- DEVICE_ID
|
- DEVICE_ID
|
||||||
|
|
Loading…
Reference in New Issue