Refactored testing after pipeline refactoring
parent
36017d5dca
commit
7bcf674793
|
@ -4,7 +4,7 @@ TABLES_FOR_SENSED_BINS: []
|
||||||
|
|
||||||
# Participants to include in the analysis
|
# Participants to include in the analysis
|
||||||
# You must create a file for each participant named pXXX containing their device_id. This can be done manually or automatically
|
# You must create a file for each participant named pXXX containing their device_id. This can be done manually or automatically
|
||||||
PIDS: []
|
PIDS: [test01]
|
||||||
|
|
||||||
# Global var with common day segments
|
# Global var with common day segments
|
||||||
DAY_SEGMENTS: &day_segments
|
DAY_SEGMENTS: &day_segments
|
||||||
|
|
|
@ -1,3 +1,5 @@
|
||||||
|
library('tidyr')
|
||||||
|
|
||||||
filter_by_day_segment <- function(data, day_segment) {
|
filter_by_day_segment <- function(data, day_segment) {
|
||||||
if(day_segment %in% c("morning", "afternoon", "evening", "night"))
|
if(day_segment %in% c("morning", "afternoon", "evening", "night"))
|
||||||
data <- data %>% filter(local_day_segment == day_segment)
|
data <- data %>% filter(local_day_segment == day_segment)
|
||||||
|
@ -56,6 +58,6 @@ base_sms_features <- function(sms, sms_type, day_segment, requested_features){
|
||||||
features <- merge(features, feature, by="local_date", all = TRUE)
|
features <- merge(features, feature, by="local_date", all = TRUE)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
features <- features %>% mutate_at(vars(contains("countmostfrequentcontact")), list( ~ replace_na(., 0)))
|
||||||
return(features)
|
return(features)
|
||||||
}
|
}
|
|
@ -6,23 +6,33 @@ include: "../rules/models.snakefile"
|
||||||
include: "../rules/reports.snakefile"
|
include: "../rules/reports.snakefile"
|
||||||
include: "../rules/mystudy.snakefile" # You can add snakfiles with rules tailored to your project
|
include: "../rules/mystudy.snakefile" # You can add snakfiles with rules tailored to your project
|
||||||
|
|
||||||
|
files_to_compute = []
|
||||||
|
|
||||||
|
if len(config["PIDS"]) == 0:
|
||||||
|
raise ValueError("Add participants IDs to PIDS in config.yaml. Remember to create their participant files in data/external")
|
||||||
|
|
||||||
|
if config["MESSAGES"]["COMPUTE"]:
|
||||||
|
files_to_compute.extend(expand("data/raw/{pid}/{sensor}_raw.csv", pid=config["PIDS"], sensor=config["MESSAGES"]["DB_TABLE"]))
|
||||||
|
files_to_compute.extend(expand("data/raw/{pid}/{sensor}_with_datetime.csv", pid=config["PIDS"], sensor=config["MESSAGES"]["DB_TABLE"]))
|
||||||
|
files_to_compute.extend(expand("data/processed/{pid}/messages_{messages_type}_{day_segment}.csv", pid=config["PIDS"], messages_type = config["MESSAGES"]["TYPES"], day_segment = config["MESSAGES"]["DAY_SEGMENTS"]))
|
||||||
|
|
||||||
|
if config["CALLS"]["COMPUTE"]:
|
||||||
|
files_to_compute.extend(expand("data/raw/{pid}/{sensor}_raw.csv", pid=config["PIDS"], sensor=config["CALLS"]["DB_TABLE"]))
|
||||||
|
files_to_compute.extend(expand("data/raw/{pid}/{sensor}_with_datetime.csv", pid=config["PIDS"], sensor=config["CALLS"]["DB_TABLE"]))
|
||||||
|
files_to_compute.extend(expand("data/raw/{pid}/{sensor}_with_datetime_unified.csv", pid=config["PIDS"], sensor=config["CALLS"]["DB_TABLE"]))
|
||||||
|
files_to_compute.extend(expand("data/processed/{pid}/calls_{call_type}_{segment}.csv", pid=config["PIDS"], call_type=config["CALLS"]["TYPES"], segment = config["CALLS"]["DAY_SEGMENTS"]))
|
||||||
|
|
||||||
|
if config["SCREEN"]["COMPUTE"]:
|
||||||
|
if config["SCREEN"]["DB_TABLE"] not in config["TABLES_FOR_SENSED_BINS"]:
|
||||||
|
raise ValueError("Error: Add your screen table (and as many sensor tables as you have) to TABLES_FOR_SENSED_BINS in config.yaml. This is necessary to compute phone_sensed_bins (bins of time when the smartphone was sensing data)")
|
||||||
|
files_to_compute.extend(expand("data/raw/{pid}/{sensor}_raw.csv", pid=config["PIDS"], sensor=config["SCREEN"]["DB_TABLE"]))
|
||||||
|
files_to_compute.extend(expand("data/raw/{pid}/{sensor}_with_datetime.csv", pid=config["PIDS"], sensor=config["SCREEN"]["DB_TABLE"]))
|
||||||
|
files_to_compute.extend(expand("data/processed/{pid}/screen_deltas.csv", pid=config["PIDS"]))
|
||||||
|
files_to_compute.extend(expand("data/processed/{pid}/screen_{day_segment}.csv", pid = config["PIDS"], day_segment = config["SCREEN"]["DAY_SEGMENTS"]))
|
||||||
|
|
||||||
rule all:
|
rule all:
|
||||||
input:
|
input:
|
||||||
expand("data/raw/{pid}/{sensor}_raw.csv", pid=config["PIDS"], sensor=config["SENSORS"]),
|
files_to_compute
|
||||||
expand("data/raw/{pid}/{sensor}_with_datetime.csv", pid=config["PIDS"], sensor=config["SENSORS"]),
|
|
||||||
expand("data/processed/{pid}/screen_deltas.csv", pid=config["PIDS"]),
|
|
||||||
expand("data/interim/{pid}/phone_sensed_bins.csv", pid=config["PIDS"]),
|
|
||||||
expand("data/processed/{pid}/sms_{sms_type}_{day_segment}.csv",
|
|
||||||
pid=config["PIDS"],
|
|
||||||
sms_type = config["SMS"]["TYPES"],
|
|
||||||
day_segment = config["SMS"]["DAY_SEGMENTS"]),
|
|
||||||
expand("data/processed/{pid}/call_{call_type}_{segment}.csv",
|
|
||||||
pid=config["PIDS"],
|
|
||||||
call_type=config["CALLS"]["TYPES"],
|
|
||||||
segment = config["CALLS"]["DAY_SEGMENTS"]),
|
|
||||||
expand("data/processed/{pid}/screen_{day_segment}.csv",
|
|
||||||
pid = config["PIDS"],
|
|
||||||
day_segment = config["SCREEN"]["DAY_SEGMENTS"]),
|
|
||||||
|
|
||||||
rule clean:
|
rule clean:
|
||||||
shell:
|
shell:
|
||||||
|
|
|
@ -72,55 +72,54 @@ def generate_file_list(configs, sensor):
|
||||||
return zip(act_file_list, exp_file_list)
|
return zip(act_file_list, exp_file_list)
|
||||||
|
|
||||||
|
|
||||||
def generate_sensor_file_lists(configs):
|
def generate_sensor_file_lists(config):
|
||||||
# Go through the configs and select those sensors with DAY_SEGMENTS,
|
# Go through the configs and select those sensors with COMPUTE = True.
|
||||||
# optionally TYPES then create expected files Return dictionary with
|
# Also get DAY_SEGMENTS, and optionally TYPES then create expected
|
||||||
# list of file paths of expected and actual files for each sensor
|
# files. Return dictionary with list of file paths of expected and
|
||||||
# listed in the config file. Added for Travis.
|
# actual files for each sensor listed in the config file. Added for Travis.
|
||||||
|
|
||||||
# Initialize string of file path for both expected and actual metric values
|
# Initialize string of file path for both expected and actual metric values
|
||||||
act_str = "data/processed/{pid}/{sensor}_{sensor_type}{day_segment}.csv"
|
act_str = "data/processed/{pid}/{sensor}_{sensor_type}{day_segment}.csv"
|
||||||
exp_str = "tests/data/processed/{pid}/{sensor}_{sensor_type}{day_segment}.csv"
|
exp_str = "tests/data/processed/{pid}/{sensor}_{sensor_type}{day_segment}.csv"
|
||||||
|
|
||||||
# Get all the SENSORS in the config.yaml files
|
# List of available sensors that can be tested by the testing suite
|
||||||
sensors = configs['SENSORS']
|
TESTABLE_SENSORS = ['MESSAGES', 'CALLS', 'SCREEN']
|
||||||
|
|
||||||
|
# Build list of sensors to be tested.
|
||||||
|
sensors = []
|
||||||
|
for sensor in TESTABLE_SENSORS:
|
||||||
|
if config[sensor]["COMPUTE"] == True:
|
||||||
|
sensors.append(sensor)
|
||||||
|
|
||||||
sensor_file_lists = {}
|
sensor_file_lists = {}
|
||||||
|
|
||||||
# Loop though all sensors and create the actual and expected file paths
|
# Loop though all sensors and create the actual and expected file paths
|
||||||
for sensor in sensors:
|
for sensor in sensors:
|
||||||
if sensor == 'messages':
|
if 'DAY_SEGMENTS' in config[sensor]:
|
||||||
sensor = 'sms'
|
|
||||||
sensor_cap = sensor.upper()
|
|
||||||
elif sensor == 'calls':
|
|
||||||
sensor_cap = sensor.upper()
|
|
||||||
sensor = 'call'
|
|
||||||
else:
|
|
||||||
sensor_cap = sensor.upper()
|
|
||||||
if 'DAY_SEGMENTS' in configs[sensor_cap]:
|
|
||||||
sensor_type = []
|
sensor_type = []
|
||||||
if 'TYPES' in configs[sensor_cap]:
|
if 'TYPES' in config[sensor]:
|
||||||
for each in configs[sensor_cap]['TYPES']:
|
for each in config[sensor]['TYPES']:
|
||||||
sensor_type.append(each+'_')
|
sensor_type.append(each+'_')
|
||||||
|
|
||||||
if sensor_type:
|
if sensor_type:
|
||||||
act_file_list = expand(act_str, pid=configs["PIDS"],
|
act_file_list = expand(act_str, pid=config["PIDS"],
|
||||||
sensor = sensor,
|
sensor = config[sensor]["DB_TABLE"],
|
||||||
sensor_type = sensor_type,
|
sensor_type = sensor_type,
|
||||||
day_segment = configs[sensor_cap]["DAY_SEGMENTS"])
|
day_segment = config[sensor]["DAY_SEGMENTS"])
|
||||||
exp_file_list = expand(exp_str, pid=configs["PIDS"],
|
exp_file_list = expand(exp_str, pid=config["PIDS"],
|
||||||
sensor = sensor,
|
sensor = config[sensor]["DB_TABLE"],
|
||||||
sensor_type = sensor_type,
|
sensor_type = sensor_type,
|
||||||
day_segment = configs[sensor_cap]["DAY_SEGMENTS"])
|
day_segment = config[sensor]["DAY_SEGMENTS"])
|
||||||
else:
|
else:
|
||||||
act_file_list = expand(act_str, pid=configs["PIDS"],
|
act_file_list = expand(act_str, pid=config["PIDS"],
|
||||||
sensor = sensor,
|
sensor = config[sensor]["DB_TABLE"],
|
||||||
sensor_type = '',
|
sensor_type = '',
|
||||||
day_segment = configs[sensor_cap]["DAY_SEGMENTS"])
|
day_segment = config[sensor]["DAY_SEGMENTS"])
|
||||||
exp_file_list = expand(exp_str, pid=configs["PIDS"],
|
exp_file_list = expand(exp_str, pid=config["PIDS"],
|
||||||
sensor = sensor,
|
sensor = config[sensor]["DB_TABLE"],
|
||||||
sensor_type = '',
|
sensor_type = '',
|
||||||
day_segment = configs[sensor_cap]["DAY_SEGMENTS"])
|
day_segment = config[sensor]["DAY_SEGMENTS"])
|
||||||
|
|
||||||
sensor_file_lists[sensor_cap] = list(zip(act_file_list,exp_file_list))
|
sensor_file_lists[sensor] = list(zip(act_file_list,exp_file_list))
|
||||||
|
|
||||||
return sensor_file_lists
|
return sensor_file_lists
|
||||||
|
|
|
@ -2,4 +2,4 @@ directory: ./
|
||||||
configfile: ./tests/settings/testing_config.yaml
|
configfile: ./tests/settings/testing_config.yaml
|
||||||
snakefile: ./tests/Snakefile
|
snakefile: ./tests/Snakefile
|
||||||
cores: 1
|
cores: 1
|
||||||
forcerun: [sms_features, call_features, screen_features]
|
forcerun: [messages_features, call_features, screen_features]
|
|
@ -1,18 +1,20 @@
|
||||||
# Valid database table name
|
# Add as many sensor tables as you have, they all improve the computation of PHONE_SENSED_BINS.
|
||||||
SENSORS: [messages, calls, screen]
|
# If you are extracting screen or Barnett's location features, screen and locations tables are mandatory.
|
||||||
#SENSORS: [calls]
|
TABLES_FOR_SENSED_BINS: [messages, calls, screen]
|
||||||
|
|
||||||
# Test Participant data to include in the unit testing
|
# Participants to include in the analysis
|
||||||
# You must create a file for each participant
|
# You must create a file for each participant named pXXX containing their device_id. This can be done manually or automatically
|
||||||
# named pXXX containing their device_id
|
|
||||||
PIDS: [test01, test02]
|
PIDS: [test01, test02]
|
||||||
|
|
||||||
|
|
||||||
# Global var with common day segments
|
# Global var with common day segments
|
||||||
DAY_SEGMENTS: &day_segments
|
DAY_SEGMENTS: &day_segments
|
||||||
[daily, morning, afternoon, evening, night]
|
[daily, morning, afternoon, evening, night]
|
||||||
|
|
||||||
# Communication SMS features config, TYPES and FEATURES keys need to match
|
# Communication SMS features config, TYPES and FEATURES keys need to match
|
||||||
SMS:
|
MESSAGES:
|
||||||
|
COMPUTE: True
|
||||||
|
DB_TABLE: messages
|
||||||
TYPES : [received, sent]
|
TYPES : [received, sent]
|
||||||
FEATURES:
|
FEATURES:
|
||||||
received: [count, distinctcontacts, timefirstsms, timelastsms, countmostfrequentcontact]
|
received: [count, distinctcontacts, timefirstsms, timelastsms, countmostfrequentcontact]
|
||||||
|
@ -21,6 +23,8 @@ SMS:
|
||||||
|
|
||||||
# Communication call features config, TYPES and FEATURES keys need to match
|
# Communication call features config, TYPES and FEATURES keys need to match
|
||||||
CALLS:
|
CALLS:
|
||||||
|
COMPUTE: True
|
||||||
|
DB_TABLE: calls
|
||||||
TYPES: [missed, incoming, outgoing]
|
TYPES: [missed, incoming, outgoing]
|
||||||
FEATURES:
|
FEATURES:
|
||||||
missed: [count, distinctcontacts, timefirstcall, timelastcall, countmostfrequentcontact]
|
missed: [count, distinctcontacts, timefirstcall, timelastcall, countmostfrequentcontact]
|
||||||
|
@ -28,7 +32,10 @@ CALLS:
|
||||||
outgoing: [count, distinctcontacts, meanduration, sumduration, minduration, maxduration, stdduration, modeduration, entropyduration, timefirstcall, timelastcall, countmostfrequentcontact]
|
outgoing: [count, distinctcontacts, meanduration, sumduration, minduration, maxduration, stdduration, modeduration, entropyduration, timefirstcall, timelastcall, countmostfrequentcontact]
|
||||||
DAY_SEGMENTS: *day_segments
|
DAY_SEGMENTS: *day_segments
|
||||||
|
|
||||||
|
|
||||||
SCREEN:
|
SCREEN:
|
||||||
|
COMPUTE: True
|
||||||
|
DB_TABLE: screen
|
||||||
DAY_SEGMENTS: *day_segments
|
DAY_SEGMENTS: *day_segments
|
||||||
REFERENCE_HOUR_FIRST_USE: 0
|
REFERENCE_HOUR_FIRST_USE: 0
|
||||||
FEATURES_DELTAS: ["countepisode", "episodepersensedminutes", "sumduration", "maxduration", "minduration", "avgduration", "stdduration", "firstuseafter"]
|
FEATURES_DELTAS: ["countepisode", "episodepersensedminutes", "sumduration", "maxduration", "minduration", "avgduration", "stdduration", "firstuseafter"]
|
||||||
|
|
Loading…
Reference in New Issue