Refactored testing after pipeline refactoring
parent
36017d5dca
commit
7bcf674793
|
@ -4,7 +4,7 @@ TABLES_FOR_SENSED_BINS: []
|
|||
|
||||
# Participants to include in the analysis
|
||||
# You must create a file for each participant named pXXX containing their device_id. This can be done manually or automatically
|
||||
PIDS: []
|
||||
PIDS: [test01]
|
||||
|
||||
# Global var with common day segments
|
||||
DAY_SEGMENTS: &day_segments
|
||||
|
|
|
@ -1,3 +1,5 @@
|
|||
library('tidyr')
|
||||
|
||||
filter_by_day_segment <- function(data, day_segment) {
|
||||
if(day_segment %in% c("morning", "afternoon", "evening", "night"))
|
||||
data <- data %>% filter(local_day_segment == day_segment)
|
||||
|
@ -56,6 +58,6 @@ base_sms_features <- function(sms, sms_type, day_segment, requested_features){
|
|||
features <- merge(features, feature, by="local_date", all = TRUE)
|
||||
}
|
||||
}
|
||||
|
||||
features <- features %>% mutate_at(vars(contains("countmostfrequentcontact")), list( ~ replace_na(., 0)))
|
||||
return(features)
|
||||
}
|
|
@ -6,23 +6,33 @@ include: "../rules/models.snakefile"
|
|||
include: "../rules/reports.snakefile"
|
||||
include: "../rules/mystudy.snakefile" # You can add snakfiles with rules tailored to your project
|
||||
|
||||
files_to_compute = []
|
||||
|
||||
if len(config["PIDS"]) == 0:
|
||||
raise ValueError("Add participants IDs to PIDS in config.yaml. Remember to create their participant files in data/external")
|
||||
|
||||
if config["MESSAGES"]["COMPUTE"]:
|
||||
files_to_compute.extend(expand("data/raw/{pid}/{sensor}_raw.csv", pid=config["PIDS"], sensor=config["MESSAGES"]["DB_TABLE"]))
|
||||
files_to_compute.extend(expand("data/raw/{pid}/{sensor}_with_datetime.csv", pid=config["PIDS"], sensor=config["MESSAGES"]["DB_TABLE"]))
|
||||
files_to_compute.extend(expand("data/processed/{pid}/messages_{messages_type}_{day_segment}.csv", pid=config["PIDS"], messages_type = config["MESSAGES"]["TYPES"], day_segment = config["MESSAGES"]["DAY_SEGMENTS"]))
|
||||
|
||||
if config["CALLS"]["COMPUTE"]:
|
||||
files_to_compute.extend(expand("data/raw/{pid}/{sensor}_raw.csv", pid=config["PIDS"], sensor=config["CALLS"]["DB_TABLE"]))
|
||||
files_to_compute.extend(expand("data/raw/{pid}/{sensor}_with_datetime.csv", pid=config["PIDS"], sensor=config["CALLS"]["DB_TABLE"]))
|
||||
files_to_compute.extend(expand("data/raw/{pid}/{sensor}_with_datetime_unified.csv", pid=config["PIDS"], sensor=config["CALLS"]["DB_TABLE"]))
|
||||
files_to_compute.extend(expand("data/processed/{pid}/calls_{call_type}_{segment}.csv", pid=config["PIDS"], call_type=config["CALLS"]["TYPES"], segment = config["CALLS"]["DAY_SEGMENTS"]))
|
||||
|
||||
if config["SCREEN"]["COMPUTE"]:
|
||||
if config["SCREEN"]["DB_TABLE"] not in config["TABLES_FOR_SENSED_BINS"]:
|
||||
raise ValueError("Error: Add your screen table (and as many sensor tables as you have) to TABLES_FOR_SENSED_BINS in config.yaml. This is necessary to compute phone_sensed_bins (bins of time when the smartphone was sensing data)")
|
||||
files_to_compute.extend(expand("data/raw/{pid}/{sensor}_raw.csv", pid=config["PIDS"], sensor=config["SCREEN"]["DB_TABLE"]))
|
||||
files_to_compute.extend(expand("data/raw/{pid}/{sensor}_with_datetime.csv", pid=config["PIDS"], sensor=config["SCREEN"]["DB_TABLE"]))
|
||||
files_to_compute.extend(expand("data/processed/{pid}/screen_deltas.csv", pid=config["PIDS"]))
|
||||
files_to_compute.extend(expand("data/processed/{pid}/screen_{day_segment}.csv", pid = config["PIDS"], day_segment = config["SCREEN"]["DAY_SEGMENTS"]))
|
||||
|
||||
rule all:
|
||||
input:
|
||||
expand("data/raw/{pid}/{sensor}_raw.csv", pid=config["PIDS"], sensor=config["SENSORS"]),
|
||||
expand("data/raw/{pid}/{sensor}_with_datetime.csv", pid=config["PIDS"], sensor=config["SENSORS"]),
|
||||
expand("data/processed/{pid}/screen_deltas.csv", pid=config["PIDS"]),
|
||||
expand("data/interim/{pid}/phone_sensed_bins.csv", pid=config["PIDS"]),
|
||||
expand("data/processed/{pid}/sms_{sms_type}_{day_segment}.csv",
|
||||
pid=config["PIDS"],
|
||||
sms_type = config["SMS"]["TYPES"],
|
||||
day_segment = config["SMS"]["DAY_SEGMENTS"]),
|
||||
expand("data/processed/{pid}/call_{call_type}_{segment}.csv",
|
||||
pid=config["PIDS"],
|
||||
call_type=config["CALLS"]["TYPES"],
|
||||
segment = config["CALLS"]["DAY_SEGMENTS"]),
|
||||
expand("data/processed/{pid}/screen_{day_segment}.csv",
|
||||
pid = config["PIDS"],
|
||||
day_segment = config["SCREEN"]["DAY_SEGMENTS"]),
|
||||
files_to_compute
|
||||
|
||||
rule clean:
|
||||
shell:
|
||||
|
|
|
@ -72,55 +72,54 @@ def generate_file_list(configs, sensor):
|
|||
return zip(act_file_list, exp_file_list)
|
||||
|
||||
|
||||
def generate_sensor_file_lists(configs):
|
||||
# Go through the configs and select those sensors with DAY_SEGMENTS,
|
||||
# optionally TYPES then create expected files Return dictionary with
|
||||
# list of file paths of expected and actual files for each sensor
|
||||
# listed in the config file. Added for Travis.
|
||||
def generate_sensor_file_lists(config):
|
||||
# Go through the configs and select those sensors with COMPUTE = True.
|
||||
# Also get DAY_SEGMENTS, and optionally TYPES then create expected
|
||||
# files. Return dictionary with list of file paths of expected and
|
||||
# actual files for each sensor listed in the config file. Added for Travis.
|
||||
|
||||
# Initialize string of file path for both expected and actual metric values
|
||||
act_str = "data/processed/{pid}/{sensor}_{sensor_type}{day_segment}.csv"
|
||||
exp_str = "tests/data/processed/{pid}/{sensor}_{sensor_type}{day_segment}.csv"
|
||||
|
||||
# Get all the SENSORS in the config.yaml files
|
||||
sensors = configs['SENSORS']
|
||||
# List of available sensors that can be tested by the testing suite
|
||||
TESTABLE_SENSORS = ['MESSAGES', 'CALLS', 'SCREEN']
|
||||
|
||||
# Build list of sensors to be tested.
|
||||
sensors = []
|
||||
for sensor in TESTABLE_SENSORS:
|
||||
if config[sensor]["COMPUTE"] == True:
|
||||
sensors.append(sensor)
|
||||
|
||||
sensor_file_lists = {}
|
||||
|
||||
# Loop though all sensors and create the actual and expected file paths
|
||||
for sensor in sensors:
|
||||
if sensor == 'messages':
|
||||
sensor = 'sms'
|
||||
sensor_cap = sensor.upper()
|
||||
elif sensor == 'calls':
|
||||
sensor_cap = sensor.upper()
|
||||
sensor = 'call'
|
||||
else:
|
||||
sensor_cap = sensor.upper()
|
||||
if 'DAY_SEGMENTS' in configs[sensor_cap]:
|
||||
if 'DAY_SEGMENTS' in config[sensor]:
|
||||
sensor_type = []
|
||||
if 'TYPES' in configs[sensor_cap]:
|
||||
for each in configs[sensor_cap]['TYPES']:
|
||||
if 'TYPES' in config[sensor]:
|
||||
for each in config[sensor]['TYPES']:
|
||||
sensor_type.append(each+'_')
|
||||
|
||||
if sensor_type:
|
||||
act_file_list = expand(act_str, pid=configs["PIDS"],
|
||||
sensor = sensor,
|
||||
act_file_list = expand(act_str, pid=config["PIDS"],
|
||||
sensor = config[sensor]["DB_TABLE"],
|
||||
sensor_type = sensor_type,
|
||||
day_segment = configs[sensor_cap]["DAY_SEGMENTS"])
|
||||
exp_file_list = expand(exp_str, pid=configs["PIDS"],
|
||||
sensor = sensor,
|
||||
day_segment = config[sensor]["DAY_SEGMENTS"])
|
||||
exp_file_list = expand(exp_str, pid=config["PIDS"],
|
||||
sensor = config[sensor]["DB_TABLE"],
|
||||
sensor_type = sensor_type,
|
||||
day_segment = configs[sensor_cap]["DAY_SEGMENTS"])
|
||||
day_segment = config[sensor]["DAY_SEGMENTS"])
|
||||
else:
|
||||
act_file_list = expand(act_str, pid=configs["PIDS"],
|
||||
sensor = sensor,
|
||||
act_file_list = expand(act_str, pid=config["PIDS"],
|
||||
sensor = config[sensor]["DB_TABLE"],
|
||||
sensor_type = '',
|
||||
day_segment = configs[sensor_cap]["DAY_SEGMENTS"])
|
||||
exp_file_list = expand(exp_str, pid=configs["PIDS"],
|
||||
sensor = sensor,
|
||||
day_segment = config[sensor]["DAY_SEGMENTS"])
|
||||
exp_file_list = expand(exp_str, pid=config["PIDS"],
|
||||
sensor = config[sensor]["DB_TABLE"],
|
||||
sensor_type = '',
|
||||
day_segment = configs[sensor_cap]["DAY_SEGMENTS"])
|
||||
day_segment = config[sensor]["DAY_SEGMENTS"])
|
||||
|
||||
sensor_file_lists[sensor_cap] = list(zip(act_file_list,exp_file_list))
|
||||
sensor_file_lists[sensor] = list(zip(act_file_list,exp_file_list))
|
||||
|
||||
return sensor_file_lists
|
||||
|
|
|
@ -2,4 +2,4 @@ directory: ./
|
|||
configfile: ./tests/settings/testing_config.yaml
|
||||
snakefile: ./tests/Snakefile
|
||||
cores: 1
|
||||
forcerun: [sms_features, call_features, screen_features]
|
||||
forcerun: [messages_features, call_features, screen_features]
|
|
@ -1,18 +1,20 @@
|
|||
# Valid database table name
|
||||
SENSORS: [messages, calls, screen]
|
||||
#SENSORS: [calls]
|
||||
# Add as many sensor tables as you have, they all improve the computation of PHONE_SENSED_BINS.
|
||||
# If you are extracting screen or Barnett's location features, screen and locations tables are mandatory.
|
||||
TABLES_FOR_SENSED_BINS: [messages, calls, screen]
|
||||
|
||||
# Test Participant data to include in the unit testing
|
||||
# You must create a file for each participant
|
||||
# named pXXX containing their device_id
|
||||
# Participants to include in the analysis
|
||||
# You must create a file for each participant named pXXX containing their device_id. This can be done manually or automatically
|
||||
PIDS: [test01, test02]
|
||||
|
||||
|
||||
# Global var with common day segments
|
||||
DAY_SEGMENTS: &day_segments
|
||||
[daily, morning, afternoon, evening, night]
|
||||
|
||||
# Communication SMS features config, TYPES and FEATURES keys need to match
|
||||
SMS:
|
||||
MESSAGES:
|
||||
COMPUTE: True
|
||||
DB_TABLE: messages
|
||||
TYPES : [received, sent]
|
||||
FEATURES:
|
||||
received: [count, distinctcontacts, timefirstsms, timelastsms, countmostfrequentcontact]
|
||||
|
@ -21,6 +23,8 @@ SMS:
|
|||
|
||||
# Communication call features config, TYPES and FEATURES keys need to match
|
||||
CALLS:
|
||||
COMPUTE: True
|
||||
DB_TABLE: calls
|
||||
TYPES: [missed, incoming, outgoing]
|
||||
FEATURES:
|
||||
missed: [count, distinctcontacts, timefirstcall, timelastcall, countmostfrequentcontact]
|
||||
|
@ -28,7 +32,10 @@ CALLS:
|
|||
outgoing: [count, distinctcontacts, meanduration, sumduration, minduration, maxduration, stdduration, modeduration, entropyduration, timefirstcall, timelastcall, countmostfrequentcontact]
|
||||
DAY_SEGMENTS: *day_segments
|
||||
|
||||
|
||||
SCREEN:
|
||||
COMPUTE: True
|
||||
DB_TABLE: screen
|
||||
DAY_SEGMENTS: *day_segments
|
||||
REFERENCE_HOUR_FIRST_USE: 0
|
||||
FEATURES_DELTAS: ["countepisode", "episodepersensedminutes", "sumduration", "maxduration", "minduration", "avgduration", "stdduration", "firstuseafter"]
|
||||
|
|
Loading…
Reference in New Issue