From 08ce25083904c3114f35551d512ecb89c49f1a06 Mon Sep 17 00:00:00 2001 From: JulioV Date: Tue, 11 Aug 2020 16:18:06 -0400 Subject: [PATCH] Unify conversation timestamp for ios --- Snakefile | 15 ++++++++++++--- rules/features.snakefile | 20 +++++++++----------- rules/preprocessing.snakefile | 4 ++-- src/data/unify_utils.R | 16 ++++++++++++++++ 4 files changed, 39 insertions(+), 16 deletions(-) diff --git a/Snakefile b/Snakefile index fde7af4b..713e3a14 100644 --- a/Snakefile +++ b/Snakefile @@ -7,6 +7,9 @@ include: "rules/reports.snakefile" import itertools +pids_android = list(filter(lambda pid: infer_participant_platform("data/external/" + pid) == "android", config["PIDS"])) +pids_ios = list(filter(lambda pid: infer_participant_platform("data/external/" + pid) == "ios", config["PIDS"])) + files_to_compute = [] if len(config["PIDS"]) == 0: @@ -38,7 +41,6 @@ if config["CALLS"]["COMPUTE"]: files_to_compute.extend(expand("data/processed/{pid}/calls_{call_type}_{day_segment}.csv", pid=config["PIDS"], call_type=config["CALLS"]["TYPES"], day_segment = config["CALLS"]["DAY_SEGMENTS"])) if config["BARNETT_LOCATION"]["COMPUTE"]: - # TODO add files_to_compute.extend(optional_location_input(None)) if config["BARNETT_LOCATION"]["LOCATIONS_TO_USE"] == "RESAMPLE_FUSED": if config["BARNETT_LOCATION"]["DB_TABLE"] in config["PHONE_VALID_SENSED_BINS"]["TABLES"]: files_to_compute.extend(expand("data/interim/{pid}/phone_sensed_bins.csv", pid=config["PIDS"])) @@ -55,7 +57,11 @@ if config["BLUETOOTH"]["COMPUTE"]: files_to_compute.extend(expand("data/processed/{pid}/bluetooth_{day_segment}.csv", pid=config["PIDS"], day_segment = config["BLUETOOTH"]["DAY_SEGMENTS"])) if config["ACTIVITY_RECOGNITION"]["COMPUTE"]: - # TODO add files_to_compute.extend(optional_ar_input(None)), the Android or iOS table gets processed depending on each participant + for pids,table in zip([pids_android, pids_ios], [config["ACTIVITY_RECOGNITION"]["DB_TABLE"]["ANDROID"], config["ACTIVITY_RECOGNITION"]["DB_TABLE"]["IOS"]]): + files_to_compute.extend(expand("data/raw/{pid}/{sensor}_raw.csv", pid=pids, sensor=table)) + files_to_compute.extend(expand("data/raw/{pid}/{sensor}_with_datetime.csv", pid=pids, sensor=table)) + files_to_compute.extend(expand("data/raw/{pid}/{sensor}_with_datetime_unified.csv", pid=pids, sensor=table)) + files_to_compute.extend(expand("data/processed/{pid}/{sensor}_deltas.csv", pid=pids, sensor=table)) files_to_compute.extend(expand("data/processed/{pid}/activity_recognition_{day_segment}.csv",pid=config["PIDS"], day_segment = config["ACTIVITY_RECOGNITION"]["DAY_SEGMENTS"])) if config["BATTERY"]["COMPUTE"]: @@ -121,7 +127,10 @@ if config["SLEEP"]["COMPUTE"]: files_to_compute.extend(expand("data/processed/{pid}/fitbit_sleep_{day_segment}.csv", pid = config["PIDS"], day_segment = config["SLEEP"]["DAY_SEGMENTS"])) if config["CONVERSATION"]["COMPUTE"]: - # TODO add files_to_compute.extend(optional_conversation_input(None)), the Android or iOS table gets processed depending on each participant + for pids,table in zip([pids_android, pids_ios], [config["CONVERSATION"]["DB_TABLE"]["ANDROID"], config["CONVERSATION"]["DB_TABLE"]["IOS"]]): + files_to_compute.extend(expand("data/raw/{pid}/{sensor}_raw.csv", pid=pids, sensor=table)) + files_to_compute.extend(expand("data/raw/{pid}/{sensor}_with_datetime.csv", pid=pids, sensor=table)) + files_to_compute.extend(expand("data/raw/{pid}/{sensor}_with_datetime_unified.csv", pid=pids, sensor=table)) files_to_compute.extend(expand("data/processed/{pid}/conversation_{day_segment}.csv",pid=config["PIDS"], day_segment = config["CONVERSATION"]["DAY_SEGMENTS"])) if config["DORYAB_LOCATION"]["COMPUTE"]: diff --git a/rules/features.snakefile b/rules/features.snakefile index bea20a37..e4e6e78b 100644 --- a/rules/features.snakefile +++ b/rules/features.snakefile @@ -1,11 +1,15 @@ -def optional_ar_input(wildcards): - with open("data/external/"+wildcards.pid, encoding="ISO-8859-1") as external_file: +def infer_participant_platform(participant_file): + with open(participant_file, encoding="ISO-8859-1") as external_file: external_file_content = external_file.readlines() platforms = external_file_content[1].strip().split(",") if platforms[0] == "multiple" or (len(platforms) > 1 and "android" in platforms and "ios" in platforms): platform = "android" else: platform = platforms[0] + return platform + +def optional_ar_input(wildcards): + platform = infer_participant_platform("data/external/"+wildcards.pid) if platform == "android": return ["data/raw/{pid}/" + config["ACTIVITY_RECOGNITION"]["DB_TABLE"]["ANDROID"] + "_with_datetime_unified.csv", @@ -17,18 +21,12 @@ def optional_ar_input(wildcards): raise ValueError("Platform (line 2) in a participant file should be 'android', 'ios', or 'multiple'. You typed '" + platforms + "'") def optional_conversation_input(wildcards): - with open("data/external/"+wildcards.pid, encoding="ISO-8859-1") as external_file: - external_file_content = external_file.readlines() - platforms = external_file_content[1].strip().split(",") - if platforms[0] == "multiple" or (len(platforms) > 1 and "android" in platforms and "ios" in platforms): - platform = "android" - else: - platform = platforms[0] + platform = infer_participant_platform("data/external/"+wildcards.pid) if platform == "android": - return ["data/raw/{pid}/" + config["CONVERSATION"]["DB_TABLE"]["ANDROID"] + "_with_datetime.csv"] + return ["data/raw/{pid}/" + config["CONVERSATION"]["DB_TABLE"]["ANDROID"] + "_with_datetime_unified.csv"] elif platform == "ios": - return ["data/raw/{pid}/" + config["CONVERSATION"]["DB_TABLE"]["IOS"] + "_with_datetime.csv"] + return ["data/raw/{pid}/" + config["CONVERSATION"]["DB_TABLE"]["IOS"] + "_with_datetime_unified.csv"] else: raise ValueError("Platform (line 2) in a participant file should be 'android' or 'ios', or 'multiple'. You typed '" + platforms + "'") diff --git a/rules/preprocessing.snakefile b/rules/preprocessing.snakefile index 8993afae..edf30abb 100644 --- a/rules/preprocessing.snakefile +++ b/rules/preprocessing.snakefile @@ -32,7 +32,7 @@ rule download_dataset: table = "{sensor}", timezone = config["TIMEZONE"], aware_multiplatform_tables = config["ACTIVITY_RECOGNITION"]["DB_TABLE"]["ANDROID"] + "," + config["ACTIVITY_RECOGNITION"]["DB_TABLE"]["IOS"] + "," + config["CONVERSATION"]["DB_TABLE"]["ANDROID"] + "," + config["CONVERSATION"]["DB_TABLE"]["IOS"], - unifiable_sensors = {"calls": config["CALLS"]["DB_TABLE"], "battery": config["BATTERY"]["DB_TABLE"], "screen": config["SCREEN"]["DB_TABLE"], "ios_activity_recognition": config["ACTIVITY_RECOGNITION"]["DB_TABLE"]["IOS"]} + unifiable_sensors = {"calls": config["CALLS"]["DB_TABLE"], "battery": config["BATTERY"]["DB_TABLE"], "screen": config["SCREEN"]["DB_TABLE"], "ios_activity_recognition": config["ACTIVITY_RECOGNITION"]["DB_TABLE"]["IOS"], "ios_conversation": config["CONVERSATION"]["DB_TABLE"]["IOS"]} output: "data/raw/{pid}/{sensor}_raw.csv" script: @@ -89,7 +89,7 @@ rule unify_ios_android: participant_info = "data/external/{pid}" params: sensor = "{sensor}", - unifiable_sensors = {"calls": config["CALLS"]["DB_TABLE"], "battery": config["BATTERY"]["DB_TABLE"], "screen": config["SCREEN"]["DB_TABLE"], "ios_activity_recognition": config["ACTIVITY_RECOGNITION"]["DB_TABLE"]["IOS"]} + unifiable_sensors = {"calls": config["CALLS"]["DB_TABLE"], "battery": config["BATTERY"]["DB_TABLE"], "screen": config["SCREEN"]["DB_TABLE"], "ios_activity_recognition": config["ACTIVITY_RECOGNITION"]["DB_TABLE"]["IOS"], "ios_conversation": config["CONVERSATION"]["DB_TABLE"]["IOS"]} output: "data/raw/{pid}/{sensor}_with_datetime_unified.csv" script: diff --git a/src/data/unify_utils.R b/src/data/unify_utils.R index 0a9539c4..f373c55f 100644 --- a/src/data/unify_utils.R +++ b/src/data/unify_utils.R @@ -122,6 +122,20 @@ unify_ios_gar <- function(ios_gar){ return(ios_gar) } +unify_ios_conversation <- function(conversation){ + if(nrow(conversation) > 0){ + duration_check <- conversation %>% + select(double_convo_start, double_convo_end) %>% + mutate(start_is_seconds = double_convo_start <= 9999999999, + end_is_seconds = double_convo_end <= 9999999999) # Values smaller than 9999999999 are in seconds instead of milliseconds + start_end_in_seconds = sum(duration_check$start_is_seconds) + sum(duration_check$end_is_seconds) + + if(start_end_in_seconds > 0) # convert seconds to milliseconds + conversation <- conversation %>% mutate(double_convo_start = double_convo_start * 1000, double_convo_end = double_convo_end * 1000) + } + return(conversation) +} + # This function is used in download_dataset.R unify_raw_data <- function(dbEngine, table, start_datetime_utc, end_datetime_utc, aware_multiplatform_tables, unifiable_tables, device_ids, platforms){ # If platforms is 'multiple', fetch each device_id's platform from aware_device, otherwise, use those given by the user @@ -185,6 +199,8 @@ unify_data <- function(sensor_data, sensor, platform, unifiable_sensors){ sensor_data = unify_ios_screen(sensor_data) } # android screen remains unchanged + } else if(sensor == unifiable_sensors$ios_conversation){ + sensor_data = unify_ios_conversation(sensor_data) } return(sensor_data) } \ No newline at end of file