diff --git a/Snakefile b/Snakefile index f89fa7c9..7b64a768 100644 --- a/Snakefile +++ b/Snakefile @@ -42,8 +42,8 @@ rule all: expand("data/processed/{pid}/bluetooth_{segment}.csv", pid=config["PIDS"], segment = config["BLUETOOTH"]["DAY_SEGMENTS"]), - expand("data/processed/{pid}/google_activity_recognition_{segment}.csv",pid=config["PIDS"], - segment = config["GOOGLE_ACTIVITY_RECOGNITION"]["DAY_SEGMENTS"]), + expand("data/processed/{pid}/activity_recognition_{segment}.csv",pid=config["PIDS"], + segment = config["ACTIVITY_RECOGNITION"]["DAY_SEGMENTS"]), expand("data/processed/{pid}/battery_{day_segment}.csv", pid = config["PIDS"], day_segment = config["BATTERY"]["DAY_SEGMENTS"]), diff --git a/config.yaml b/config.yaml index cede7342..655bc06d 100644 --- a/config.yaml +++ b/config.yaml @@ -1,5 +1,5 @@ # Valid database table names -SENSORS: [applications_crashes, applications_foreground, applications_notifications, battery, bluetooth, calls, locations, messages, plugin_ambient_noise, plugin_device_usage, plugin_google_activity_recognition, screen] +SENSORS: [applications_crashes, applications_foreground, applications_notifications, battery, bluetooth, calls, locations, messages, plugin_ambient_noise, plugin_device_usage, plugin_google_activity_recognition, plugin_ios_activity_recognition, screen] FITBIT_TABLE: [fitbit_data] FITBIT_SENSORS: [heartrate, steps, sleep, calories] @@ -78,7 +78,7 @@ BLUETOOTH: DAY_SEGMENTS: *day_segments FEATURES: ["countscans", "uniquedevices", "countscansmostuniquedevice"] -GOOGLE_ACTIVITY_RECOGNITION: +ACTIVITY_RECOGNITION: DAY_SEGMENTS: *day_segments FEATURES: ['count','mostcommonactivity','countuniqueactivities','activitychangecount','sumstationary','summobile','sumvehicle'] @@ -132,7 +132,7 @@ PARAMS_FOR_ANALYSIS: GROUNDTRUTH_TABLE: participant_info SOURCES: &sources ["phone_features", "fitbit_features", "phone_fitbit_features"] DAY_SEGMENTS: *day_segments - PHONE_FEATURES: [accelerometer, applications_foreground, battery, call_incoming, call_missed, call_outgoing, google_activity_recognition, light, location_barnett, screen, sms_received, sms_sent] + PHONE_FEATURES: [accelerometer, applications_foreground, battery, call_incoming, call_missed, call_outgoing, activity_recognition, light, location_barnett, screen, sms_received, sms_sent] FITBIT_FEATURES: [fitbit_heartrate, fitbit_step] PHONE_FITBIT_FEATURES: "" # This array is merged in the input_merge_features_of_single_participant function in models.snakefile DEMOGRAPHIC_FEATURES: [age, gender, inpatientdays] diff --git a/rules/features.snakefile b/rules/features.snakefile index ce2f99d1..6bc5b694 100644 --- a/rules/features.snakefile +++ b/rules/features.snakefile @@ -1,3 +1,14 @@ +def optional_ar_input(wildcards): + with open("data/external/"+wildcards.pid, encoding="ISO-8859-1") as external_file: + external_file_content = external_file.readlines() + platform = external_file_content[1].strip() + if platform == "android": + return ["data/raw/{pid}/plugin_google_activity_recognition_with_datetime_unified.csv", + "data/processed/{pid}/plugin_google_activity_recognition_deltas.csv"] + else: + return ["data/raw/{pid}/plugin_ios_activity_recognition_with_datetime_unified.csv", + "data/processed/{pid}/plugin_ios_activity_recognition_deltas.csv"] + rule sms_features: input: "data/raw/{pid}/messages_with_datetime.csv" @@ -41,11 +52,19 @@ rule screen_deltas: rule google_activity_recognition_deltas: input: - "data/raw/{pid}/plugin_google_activity_recognition_with_datetime.csv" + "data/raw/{pid}/plugin_google_activity_recognition_with_datetime_unified.csv" output: "data/processed/{pid}/plugin_google_activity_recognition_deltas.csv" script: - "../src/features/google_activity_recognition_deltas.R" + "../src/features/activity_recognition_deltas.R" + +rule ios_activity_recognition_deltas: + input: + "data/raw/{pid}/plugin_ios_activity_recognition_with_datetime_unified.csv" + output: + "data/processed/{pid}/plugin_ios_activity_recognition_deltas.csv" + script: + "../src/features/activity_recognition_deltas.R" rule location_barnett_features: input: @@ -72,18 +91,17 @@ rule bluetooth_features: "data/processed/{pid}/bluetooth_{day_segment}.csv" script: "../src/features/bluetooth_features.R" - + rule activity_features: input: - gar_events = "data/raw/{pid}/plugin_google_activity_recognition_with_datetime.csv", - gar_deltas = "data/processed/{pid}/plugin_google_activity_recognition_deltas.csv" + optional_ar_input params: segment = "{day_segment}", - features = config["GOOGLE_ACTIVITY_RECOGNITION"]["FEATURES"] + features = config["ACTIVITY_RECOGNITION"]["FEATURES"] output: - "data/processed/{pid}/google_activity_recognition_{day_segment}.csv" + "data/processed/{pid}/activity_recognition_{day_segment}.csv" script: - "../src/features/google_activity_recognition.py" + "../src/features/activity_recognition.py" rule battery_features: input: diff --git a/src/data/unify_ios_android.R b/src/data/unify_ios_android.R index 6581074a..03639f56 100644 --- a/src/data/unify_ios_android.R +++ b/src/data/unify_ios_android.R @@ -1,6 +1,7 @@ source("packrat/init.R") library(dplyr) +library(stringr) unify_ios_battery <- function(ios_battery){ # We only need to unify battery data for iOS client V1. V2 does it out-of-the-box @@ -64,6 +65,50 @@ unify_ios_calls <- function(ios_calls){ return(ios_calls) } +clean_ios_activity_column <- function(ios_gar){ + ios_gar <- ios_gar %>% + mutate(activities = str_replace_all(activities, pattern = '("|\\[|\\])', replacement = "")) + + existent_multiple_activities <- ios_gar %>% + filter(str_detect(activities, ",")) %>% + group_by(activities) %>% + summarise(mutiple_activities = unique(activities)) %>% + pull(mutiple_activities) + + known_multiple_activities <- c("stationary,automotive") + unkown_multiple_actvities <- setdiff(existent_multiple_activities, known_multiple_activities) + if(length(unkown_multiple_actvities) > 0){ + stop(paste0("There are unkwown combinations of ios activities, you need to implement the decision of the ones to keep: ", unkown_multiple_actvities)) + } + + ios_gar <- ios_gar %>% + mutate(activities = str_replace_all(activities, pattern = "stationary,automotive", replacement = "automotive")) + + return(ios_gar) +} + +unify_ios_gar <- function(ios_gar){ + # We only need to unify Google Activity Recognition data for iOS + # discard rows where activities column is blank + ios_gar <- ios_gar[-which(ios_gar$activities == ""), ] + # clean "activities" column of ios_gar + ios_gar <- clean_ios_activity_column(ios_gar) + + # make it compatible with android version: generate "activity_name" and "activity_type" columns + ios_gar <- ios_gar %>% + mutate(activity_name = case_when(activities == "automotive" ~ "in_vehicle", + activities == "cycling" ~ "on_bicycle", + activities == "walking" | activities == "running" ~ "on_foot", + activities == "stationary" ~ "still"), + activity_type = case_when(activities == "automotive" ~ 0, + activities == "cycling" ~ 1, + activities == "walking" | activities == "running" ~ 2, + activities == "stationary" ~ 3, + activities == "unknown" ~ 4)) + + return(ios_gar) +} + sensor_data <- read.csv(snakemake@input[["sensor_data"]], stringsAsFactors = FALSE) participant_info <- snakemake@input[["participant_info"]] @@ -80,5 +125,7 @@ if(sensor == "calls"){ sensor_data = unify_ios_battery(sensor_data) } # android battery remains unchanged +} else if(sensor == "plugin_ios_activity_recognition"){ + sensor_data = unify_ios_gar(sensor_data) } write.csv(sensor_data, snakemake@output[[1]], row.names = FALSE) diff --git a/src/features/google_activity_recognition.py b/src/features/activity_recognition.py similarity index 91% rename from src/features/google_activity_recognition.py rename to src/features/activity_recognition.py index 159dd979..3f3ac5a3 100644 --- a/src/features/google_activity_recognition.py +++ b/src/features/activity_recognition.py @@ -7,8 +7,8 @@ day_segment = snakemake.params["segment"] features = snakemake.params["features"] #Read csv into a pandas dataframe -data = pd.read_csv(snakemake.input['gar_events'],parse_dates=['local_date_time']) -ar_deltas = pd.read_csv(snakemake.input['gar_deltas'],parse_dates=["local_start_date_time", "local_end_date_time", "local_start_date", "local_end_date"]) +data = pd.read_csv(snakemake.input[0],parse_dates=["local_date_time"]) +ar_deltas = pd.read_csv(snakemake.input[1],parse_dates=["local_start_date_time", "local_end_date_time", "local_start_date", "local_end_date"]) columns = list("ar_" + str(day_segment) + "_" + column for column in features) if data.empty: @@ -60,4 +60,4 @@ else: .agg({"ar_" + str(day_segment) + "_" + str(column) :'sum'})) finalDataset.index.names = ['local_date'] -finalDataset.to_csv(snakemake.output[0]) \ No newline at end of file +finalDataset.to_csv(snakemake.output[0]) diff --git a/src/features/google_activity_recognition_deltas.R b/src/features/activity_recognition_deltas.R similarity index 100% rename from src/features/google_activity_recognition_deltas.R rename to src/features/activity_recognition_deltas.R