Refactored testing after pipeline refactoring

pull/95/head
kaguillera 2020-06-23 18:29:01 -04:00
parent 36017d5dca
commit 7bcf674793
56 changed files with 75 additions and 57 deletions

View File

@ -4,7 +4,7 @@ TABLES_FOR_SENSED_BINS: []
# Participants to include in the analysis
# You must create a file for each participant named pXXX containing their device_id. This can be done manually or automatically
PIDS: []
PIDS: [test01]
# Global var with common day segments
DAY_SEGMENTS: &day_segments

View File

@ -1,3 +1,5 @@
library('tidyr')
filter_by_day_segment <- function(data, day_segment) {
if(day_segment %in% c("morning", "afternoon", "evening", "night"))
data <- data %>% filter(local_day_segment == day_segment)
@ -56,6 +58,6 @@ base_sms_features <- function(sms, sms_type, day_segment, requested_features){
features <- merge(features, feature, by="local_date", all = TRUE)
}
}
features <- features %>% mutate_at(vars(contains("countmostfrequentcontact")), list( ~ replace_na(., 0)))
return(features)
}

View File

@ -6,23 +6,33 @@ include: "../rules/models.snakefile"
include: "../rules/reports.snakefile"
include: "../rules/mystudy.snakefile" # You can add snakfiles with rules tailored to your project
files_to_compute = []
if len(config["PIDS"]) == 0:
raise ValueError("Add participants IDs to PIDS in config.yaml. Remember to create their participant files in data/external")
if config["MESSAGES"]["COMPUTE"]:
files_to_compute.extend(expand("data/raw/{pid}/{sensor}_raw.csv", pid=config["PIDS"], sensor=config["MESSAGES"]["DB_TABLE"]))
files_to_compute.extend(expand("data/raw/{pid}/{sensor}_with_datetime.csv", pid=config["PIDS"], sensor=config["MESSAGES"]["DB_TABLE"]))
files_to_compute.extend(expand("data/processed/{pid}/messages_{messages_type}_{day_segment}.csv", pid=config["PIDS"], messages_type = config["MESSAGES"]["TYPES"], day_segment = config["MESSAGES"]["DAY_SEGMENTS"]))
if config["CALLS"]["COMPUTE"]:
files_to_compute.extend(expand("data/raw/{pid}/{sensor}_raw.csv", pid=config["PIDS"], sensor=config["CALLS"]["DB_TABLE"]))
files_to_compute.extend(expand("data/raw/{pid}/{sensor}_with_datetime.csv", pid=config["PIDS"], sensor=config["CALLS"]["DB_TABLE"]))
files_to_compute.extend(expand("data/raw/{pid}/{sensor}_with_datetime_unified.csv", pid=config["PIDS"], sensor=config["CALLS"]["DB_TABLE"]))
files_to_compute.extend(expand("data/processed/{pid}/calls_{call_type}_{segment}.csv", pid=config["PIDS"], call_type=config["CALLS"]["TYPES"], segment = config["CALLS"]["DAY_SEGMENTS"]))
if config["SCREEN"]["COMPUTE"]:
if config["SCREEN"]["DB_TABLE"] not in config["TABLES_FOR_SENSED_BINS"]:
raise ValueError("Error: Add your screen table (and as many sensor tables as you have) to TABLES_FOR_SENSED_BINS in config.yaml. This is necessary to compute phone_sensed_bins (bins of time when the smartphone was sensing data)")
files_to_compute.extend(expand("data/raw/{pid}/{sensor}_raw.csv", pid=config["PIDS"], sensor=config["SCREEN"]["DB_TABLE"]))
files_to_compute.extend(expand("data/raw/{pid}/{sensor}_with_datetime.csv", pid=config["PIDS"], sensor=config["SCREEN"]["DB_TABLE"]))
files_to_compute.extend(expand("data/processed/{pid}/screen_deltas.csv", pid=config["PIDS"]))
files_to_compute.extend(expand("data/processed/{pid}/screen_{day_segment}.csv", pid = config["PIDS"], day_segment = config["SCREEN"]["DAY_SEGMENTS"]))
rule all:
input:
expand("data/raw/{pid}/{sensor}_raw.csv", pid=config["PIDS"], sensor=config["SENSORS"]),
expand("data/raw/{pid}/{sensor}_with_datetime.csv", pid=config["PIDS"], sensor=config["SENSORS"]),
expand("data/processed/{pid}/screen_deltas.csv", pid=config["PIDS"]),
expand("data/interim/{pid}/phone_sensed_bins.csv", pid=config["PIDS"]),
expand("data/processed/{pid}/sms_{sms_type}_{day_segment}.csv",
pid=config["PIDS"],
sms_type = config["SMS"]["TYPES"],
day_segment = config["SMS"]["DAY_SEGMENTS"]),
expand("data/processed/{pid}/call_{call_type}_{segment}.csv",
pid=config["PIDS"],
call_type=config["CALLS"]["TYPES"],
segment = config["CALLS"]["DAY_SEGMENTS"]),
expand("data/processed/{pid}/screen_{day_segment}.csv",
pid = config["PIDS"],
day_segment = config["SCREEN"]["DAY_SEGMENTS"]),
files_to_compute
rule clean:
shell:

View File

@ -72,55 +72,54 @@ def generate_file_list(configs, sensor):
return zip(act_file_list, exp_file_list)
def generate_sensor_file_lists(configs):
# Go through the configs and select those sensors with DAY_SEGMENTS,
# optionally TYPES then create expected files Return dictionary with
# list of file paths of expected and actual files for each sensor
# listed in the config file. Added for Travis.
def generate_sensor_file_lists(config):
# Go through the configs and select those sensors with COMPUTE = True.
# Also get DAY_SEGMENTS, and optionally TYPES then create expected
# files. Return dictionary with list of file paths of expected and
# actual files for each sensor listed in the config file. Added for Travis.
# Initialize string of file path for both expected and actual metric values
act_str = "data/processed/{pid}/{sensor}_{sensor_type}{day_segment}.csv"
exp_str = "tests/data/processed/{pid}/{sensor}_{sensor_type}{day_segment}.csv"
# Get all the SENSORS in the config.yaml files
sensors = configs['SENSORS']
# List of available sensors that can be tested by the testing suite
TESTABLE_SENSORS = ['MESSAGES', 'CALLS', 'SCREEN']
# Build list of sensors to be tested.
sensors = []
for sensor in TESTABLE_SENSORS:
if config[sensor]["COMPUTE"] == True:
sensors.append(sensor)
sensor_file_lists = {}
# Loop though all sensors and create the actual and expected file paths
for sensor in sensors:
if sensor == 'messages':
sensor = 'sms'
sensor_cap = sensor.upper()
elif sensor == 'calls':
sensor_cap = sensor.upper()
sensor = 'call'
else:
sensor_cap = sensor.upper()
if 'DAY_SEGMENTS' in configs[sensor_cap]:
if 'DAY_SEGMENTS' in config[sensor]:
sensor_type = []
if 'TYPES' in configs[sensor_cap]:
for each in configs[sensor_cap]['TYPES']:
if 'TYPES' in config[sensor]:
for each in config[sensor]['TYPES']:
sensor_type.append(each+'_')
if sensor_type:
act_file_list = expand(act_str, pid=configs["PIDS"],
sensor = sensor,
act_file_list = expand(act_str, pid=config["PIDS"],
sensor = config[sensor]["DB_TABLE"],
sensor_type = sensor_type,
day_segment = configs[sensor_cap]["DAY_SEGMENTS"])
exp_file_list = expand(exp_str, pid=configs["PIDS"],
sensor = sensor,
day_segment = config[sensor]["DAY_SEGMENTS"])
exp_file_list = expand(exp_str, pid=config["PIDS"],
sensor = config[sensor]["DB_TABLE"],
sensor_type = sensor_type,
day_segment = configs[sensor_cap]["DAY_SEGMENTS"])
day_segment = config[sensor]["DAY_SEGMENTS"])
else:
act_file_list = expand(act_str, pid=configs["PIDS"],
sensor = sensor,
act_file_list = expand(act_str, pid=config["PIDS"],
sensor = config[sensor]["DB_TABLE"],
sensor_type = '',
day_segment = configs[sensor_cap]["DAY_SEGMENTS"])
exp_file_list = expand(exp_str, pid=configs["PIDS"],
sensor = sensor,
day_segment = config[sensor]["DAY_SEGMENTS"])
exp_file_list = expand(exp_str, pid=config["PIDS"],
sensor = config[sensor]["DB_TABLE"],
sensor_type = '',
day_segment = configs[sensor_cap]["DAY_SEGMENTS"])
day_segment = config[sensor]["DAY_SEGMENTS"])
sensor_file_lists[sensor_cap] = list(zip(act_file_list,exp_file_list))
sensor_file_lists[sensor] = list(zip(act_file_list,exp_file_list))
return sensor_file_lists

View File

@ -2,4 +2,4 @@ directory: ./
configfile: ./tests/settings/testing_config.yaml
snakefile: ./tests/Snakefile
cores: 1
forcerun: [sms_features, call_features, screen_features]
forcerun: [messages_features, call_features, screen_features]

View File

@ -1,18 +1,20 @@
# Valid database table name
SENSORS: [messages, calls, screen]
#SENSORS: [calls]
# Add as many sensor tables as you have, they all improve the computation of PHONE_SENSED_BINS.
# If you are extracting screen or Barnett's location features, screen and locations tables are mandatory.
TABLES_FOR_SENSED_BINS: [messages, calls, screen]
# Test Participant data to include in the unit testing
# You must create a file for each participant
# named pXXX containing their device_id
# Participants to include in the analysis
# You must create a file for each participant named pXXX containing their device_id. This can be done manually or automatically
PIDS: [test01, test02]
# Global var with common day segments
DAY_SEGMENTS: &day_segments
[daily, morning, afternoon, evening, night]
# Communication SMS features config, TYPES and FEATURES keys need to match
SMS:
MESSAGES:
COMPUTE: True
DB_TABLE: messages
TYPES : [received, sent]
FEATURES:
received: [count, distinctcontacts, timefirstsms, timelastsms, countmostfrequentcontact]
@ -21,6 +23,8 @@ SMS:
# Communication call features config, TYPES and FEATURES keys need to match
CALLS:
COMPUTE: True
DB_TABLE: calls
TYPES: [missed, incoming, outgoing]
FEATURES:
missed: [count, distinctcontacts, timefirstcall, timelastcall, countmostfrequentcontact]
@ -28,7 +32,10 @@ CALLS:
outgoing: [count, distinctcontacts, meanduration, sumduration, minduration, maxduration, stdduration, modeduration, entropyduration, timefirstcall, timelastcall, countmostfrequentcontact]
DAY_SEGMENTS: *day_segments
SCREEN:
COMPUTE: True
DB_TABLE: screen
DAY_SEGMENTS: *day_segments
REFERENCE_HOUR_FIRST_USE: 0
FEATURES_DELTAS: ["countepisode", "episodepersensedminutes", "sumduration", "maxduration", "minduration", "avgduration", "stdduration", "firstuseafter"]